{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.0703125, "calib/ece": 0.48125000000000007, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.25, "calib/gap": -0.008333333333333304, "calib/mean_conf": 0.73125, "calib/mu_c": 0.7250000000000001, "calib/mu_w": 0.7333333333333334, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.48125000000000007, "calib/std_conf": 0.19990231989649343, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 655.78515625, "completions/mean_terminated_length": 729.9173583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.17658577859401703, "learning_rate": 0.0, "loss": 0.0118, "num_tokens": 298505.0, "reward": 0.01619849167764187, "reward_std": 0.03863148391246796, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.009296875447034836, "rewards/format_reward_step": 0.01171875, "rewards/stepwise_brier_reward": 0.010262716561555862, "step": 1 }, { "calib/answer_extract_rate": 0.03515625, "calib/avg_num_step_conf": 0.11328125, "calib/ece": 0.6925, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.25, "calib/gap": -0.1233333333333333, "calib/mean_conf": 0.7925, "calib/mu_c": 0.7, "calib/mu_w": 0.8233333333333333, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.6175, "calib/std_conf": 0.14889173919328097, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2953.0, "completions/max_terminated_length": 2953.0, "completions/mean_length": 687.84375, "completions/mean_terminated_length": 762.2857055664062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.30427029728889465, "learning_rate": 2.5000000000000004e-07, "loss": 0.0042, "num_tokens": 600921.0, "reward": 0.019744617864489555, "reward_std": 0.055846214294433594, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.007027734536677599, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.011798003688454628, "step": 2 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.0390625, "calib/ece": 0.6499999999999999, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.65, "calib/mu_c": NaN, "calib/mu_w": 0.65, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.01953125, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.6499999999999999, "calib/std_conf": 0.14719601443879743, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2880.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 740.953125, "completions/mean_terminated_length": 803.7457885742188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.2135412096977234, "learning_rate": 5.000000000000001e-07, "loss": 0.0033, "num_tokens": 918901.0, "reward": 0.005034895613789558, "reward_std": 0.01424083486199379, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0054296874441206455, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.006155208684504032, "step": 3 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.04296875, "calib/ece": 0.48333333333333334, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.125, "calib/mean_conf": 0.8166666666666668, "calib/mu_c": 0.9, "calib/mu_w": 0.775, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.01953125, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.48333333333333334, "calib/std_conf": 0.11785113019775792, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 700.9375, "completions/mean_terminated_length": 815.6363525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.1290050894021988, "learning_rate": 7.5e-07, "loss": 0.0077, "num_tokens": 1227549.0, "reward": 0.008362310007214546, "reward_std": 0.023652182891964912, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.004609375260770321, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.005480488762259483, "step": 4 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.06640625, "calib/ece": 0.6642857142857141, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.14285714285714285, "calib/gap": 0.050000000000000044, "calib/mean_conf": 0.8071428571428572, "calib/mu_c": 0.85, "calib/mu_w": 0.7999999999999999, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.6642857142857141, "calib/std_conf": 0.13477115902938006, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 680.9140625, "completions/mean_terminated_length": 761.196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.22762255370616913, "learning_rate": 1.0000000000000002e-06, "loss": -0.0011, "num_tokens": 1531591.0, "reward": 0.01147377397865057, "reward_std": 0.03245273604989052, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.008144531399011612, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.0077310362830758095, "step": 5 }, { "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.09375, "calib/ece": 0.6666666666666666, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.08750000000000002, "calib/mean_conf": 0.7777777777777778, "calib/mu_c": 0.7, "calib/mu_w": 0.7875, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.6666666666666666, "calib/std_conf": 0.10030816714037662, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 616.640625, "completions/mean_terminated_length": 677.5107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.23622936010360718, "learning_rate": 1.25e-06, "loss": 0.0143, "num_tokens": 1818443.0, "reward": 0.019612066447734833, "reward_std": 0.04767308384180069, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.012529296800494194, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.012764675542712212, "step": 6 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.06640625, "calib/ece": 0.53, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.033333333333333326, "calib/mean_conf": 0.5700000000000001, "calib/mu_c": 0.55, "calib/mu_w": 0.5833333333333334, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.3500000000000001, "calib/std_conf": 0.24000000000000002, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15234375, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 641.45703125, "completions/mean_terminated_length": 756.741943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.2200292944908142, "learning_rate": 1.5e-06, "loss": 0.0016, "num_tokens": 2113120.0, "reward": 0.02059093490242958, "reward_std": 0.05823996290564537, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.009023437276482582, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.011191869154572487, "step": 7 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.046875, "calib/ece": 0.15000000000000002, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.85, "calib/mu_c": 0.85, "calib/mu_w": NaN, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.0, "calib/std_conf": 0.04082482904638629, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 683.51171875, "completions/mean_terminated_length": 754.2198486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.3492337167263031, "learning_rate": 1.75e-06, "loss": 0.0032, "num_tokens": 2417651.0, "reward": 0.021268287673592567, "reward_std": 0.06015579774975777, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.011435546912252903, "rewards/format_reward_step": 0.01171875, "rewards/stepwise_brier_reward": 0.0106395548209548, "step": 8 }, { "calib/answer_extract_rate": 0.0390625, "calib/avg_num_step_conf": 0.046875, "calib/ece": 0.3357142857142857, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.14285714285714285, "calib/gap": 0.033333333333333326, "calib/mean_conf": 0.7642857142857142, "calib/mu_c": 0.7833333333333333, "calib/mu_w": 0.75, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.3357142857142857, "calib/std_conf": 0.12737538928662148, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 603.69921875, "completions/mean_terminated_length": 677.8377075195312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.32757800817489624, "learning_rate": 2.0000000000000003e-06, "loss": 0.0073, "num_tokens": 2702774.0, "reward": 0.022961322218179703, "reward_std": 0.06494442373514175, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.009033203125, "rewards/format_reward_step": 0.01171875, "rewards/stepwise_brier_reward": 0.006591379642486572, "step": 9 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.0703125, "calib/ece": 0.5214285714285714, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.2857142857142857, "calib/gap": -0.009999999999999898, "calib/mean_conf": 0.807142857142857, "calib/mu_c": 0.8, "calib/mu_w": 0.8099999999999999, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.5214285714285714, "calib/std_conf": 0.11473127431577862, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3056.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 672.26171875, "completions/mean_terminated_length": 729.2330322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.25926557183265686, "learning_rate": 2.25e-06, "loss": -0.0039, "num_tokens": 3004713.0, "reward": 0.018368151038885117, "reward_std": 0.05195297300815582, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.009648437611758709, "rewards/format_reward_step": 0.01953125, "rewards/stepwise_brier_reward": 0.015113226138055325, "step": 10 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.23046875, "calib/ece": 0.16153846153846155, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.07692307692307693, "calib/gap": 0.014999999999999902, "calib/mean_conf": 0.7615384615384614, "calib/mu_c": 0.7649999999999999, "calib/mu_w": 0.75, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.07692307692307696, "calib/std_conf": 0.12883180172649406, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 706.15625, "completions/mean_terminated_length": 792.877197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.32639095187187195, "learning_rate": 2.5e-06, "loss": 0.0305, "num_tokens": 3313009.0, "reward": 0.07463431358337402, "reward_std": 0.18631576001644135, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.03682617098093033, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.03582242131233215, "step": 11 }, { "calib/answer_extract_rate": 0.09765625, "calib/avg_num_step_conf": 0.37890625, "calib/ece": 0.2871428571428571, "calib/final_conf_rate": 0.08203125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.09523809523809523, "calib/gap": 0.03277777777777757, "calib/mean_conf": 0.7442857142857142, "calib/mu_c": 0.7583333333333332, "calib/mu_w": 0.7255555555555556, "calib/nonempty_final_conf_rate": 0.08203125, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.22999999999999998, "calib/std_conf": 0.13692442922914774, "calib/step_conf_rate": 0.08984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 563.20703125, "completions/mean_terminated_length": 600.7542114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.4486089050769806, "learning_rate": 2.7500000000000004e-06, "loss": -0.0015, "num_tokens": 3584406.0, "reward": 0.1078411191701889, "reward_std": 0.16879743337631226, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.04987538978457451, "rewards/format_reward_step": 0.06640625, "rewards/stepwise_brier_reward": 0.05505118519067764, "step": 12 }, { "calib/answer_extract_rate": 0.09765625, "calib/avg_num_step_conf": 0.7265625, "calib/ece": 0.6033333333333333, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.125, "calib/gap": 0.06600000000000006, "calib/mean_conf": 0.7699999999999999, "calib/mu_c": 0.825, "calib/mu_w": 0.7589999999999999, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.140625, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.6033333333333333, "calib/std_conf": 0.16830032679706836, "calib/step_conf_rate": 0.1171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 650.65625, "completions/mean_terminated_length": 699.8656005859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.27529487013816833, "learning_rate": 3e-06, "loss": 0.0526, "num_tokens": 3878606.0, "reward": 0.05349308252334595, "reward_std": 0.12776944041252136, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.029758203774690628, "rewards/format_reward_step": 0.05859375, "rewards/stepwise_brier_reward": 0.03726842254400253, "step": 13 }, { "calib/answer_extract_rate": 0.25, "calib/avg_num_step_conf": 1.13671875, "calib/ece": 0.4633333333333334, "calib/final_conf_rate": 0.19921875, "calib/format_rate": 0.1484375, "calib/frac_conf_gt_0.9": 0.058823529411764705, "calib/gap": 0.002000000000000224, "calib/mean_conf": 0.733921568627451, "calib/mu_c": 0.7353333333333334, "calib/mu_w": 0.7333333333333332, "calib/nonempty_final_conf_rate": 0.19921875, "calib/nonempty_reasoning_rate": 0.29296875, "calib/nonempty_step_conf_rate": 0.25390625, "calib/pce": 0.4515686274509805, "calib/std_conf": 0.14506358891504084, "calib/step_conf_rate": 0.25390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2898.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 566.8515625, "completions/mean_terminated_length": 594.7294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.014933333333333333, "grad_norm": 0.6952277421951294, "learning_rate": 3.2500000000000002e-06, "loss": 0.0509, "num_tokens": 4152160.0, "reward": 0.16866666078567505, "reward_std": 0.2768186032772064, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.0842718705534935, "rewards/format_reward_step": 0.1484375, "rewards/stepwise_brier_reward": 0.11862284690141678, "step": 14 }, { "calib/answer_extract_rate": 0.41015625, "calib/avg_num_step_conf": 1.859375, "calib/ece": 0.41095744680851054, "calib/final_conf_rate": 0.3671875, "calib/format_rate": 0.3046875, "calib/frac_conf_gt_0.9": 0.11702127659574468, "calib/gap": 0.019162227602905424, "calib/mean_conf": 0.7588297872340425, "calib/mu_c": 0.7708571428571429, "calib/mu_w": 0.7516949152542375, "calib/nonempty_final_conf_rate": 0.3671875, "calib/nonempty_reasoning_rate": 0.48046875, "calib/nonempty_step_conf_rate": 0.41796875, "calib/pce": 0.39872340425531905, "calib/std_conf": 0.14718533968810774, "calib/step_conf_rate": 0.41796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2689.0, "completions/max_terminated_length": 2689.0, "completions/mean_length": 483.1328125, "completions/mean_terminated_length": 490.8016052246094, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.8166272044181824, "learning_rate": 3.5e-06, "loss": 0.0548, "num_tokens": 4406762.0, "reward": 0.3417610228061676, "reward_std": 0.5316358208656311, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.1889699250459671, "rewards/format_reward_step": 0.3046875, "rewards/stepwise_brier_reward": 0.22660425305366516, "step": 15 }, { "calib/answer_extract_rate": 0.640625, "calib/avg_num_step_conf": 3.7421875, "calib/ece": 0.4356962025316456, "calib/final_conf_rate": 0.6171875, "calib/format_rate": 0.515625, "calib/frac_conf_gt_0.9": 0.06962025316455696, "calib/gap": 0.040874455732946235, "calib/mean_conf": 0.7439240506329114, "calib/mu_c": 0.7713461538461538, "calib/mu_w": 0.7304716981132076, "calib/nonempty_final_conf_rate": 0.6171875, "calib/nonempty_reasoning_rate": 0.765625, "calib/nonempty_step_conf_rate": 0.703125, "calib/pce": 0.425253164556962, "calib/std_conf": 0.15553434006853403, "calib/step_conf_rate": 0.703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2831.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 475.28125, "completions/mean_terminated_length": 479.02362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.7212015986442566, "learning_rate": 3.7500000000000005e-06, "loss": 0.0315, "num_tokens": 4660322.0, "reward": 0.5336143374443054, "reward_std": 0.5757545828819275, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.31148359179496765, "rewards/format_reward_step": 0.515625, "rewards/stepwise_brier_reward": 0.39899012446403503, "step": 16 }, { "calib/answer_extract_rate": 0.87109375, "calib/avg_num_step_conf": 4.921875, "calib/ece": 0.3467136150234741, "calib/final_conf_rate": 0.83203125, "calib/format_rate": 0.79296875, "calib/frac_conf_gt_0.9": 0.07511737089201878, "calib/gap": 0.039069767441860526, "calib/mean_conf": 0.7396713615023475, "calib/mu_c": 0.7633333333333332, "calib/mu_w": 0.7242635658914727, "calib/nonempty_final_conf_rate": 0.83203125, "calib/nonempty_reasoning_rate": 0.921875, "calib/nonempty_step_conf_rate": 0.89453125, "calib/pce": 0.3460093896713614, "calib/std_conf": 0.15670427570716403, "calib/step_conf_rate": 0.89453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1576.0, "completions/max_terminated_length": 1576.0, "completions/mean_length": 385.89453125, "completions/mean_terminated_length": 387.4078674316406, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.018133333333333335, "grad_norm": 1.0878597497940063, "learning_rate": 4.000000000000001e-06, "loss": 0.0649, "num_tokens": 4885679.0, "reward": 0.825513482093811, "reward_std": 0.5877258777618408, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.5036300420761108, "rewards/format_reward_step": 0.79296875, "rewards/stepwise_brier_reward": 0.6026062965393066, "step": 17 }, { "calib/answer_extract_rate": 0.8828125, "calib/avg_num_step_conf": 4.65234375, "calib/ece": 0.3434090909090908, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.80859375, "calib/frac_conf_gt_0.9": 0.03636363636363636, "calib/gap": 0.035937499999999956, "calib/mean_conf": 0.7615909090909091, "calib/mu_c": 0.7825, "calib/mu_w": 0.7465625, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.90625, "calib/pce": 0.3434090909090908, "calib/std_conf": 0.10826615314748822, "calib/step_conf_rate": 0.90625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2957.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 393.1171875, "completions/mean_terminated_length": 393.1171875, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "epoch": 0.0192, "grad_norm": 0.529790461063385, "learning_rate": 4.25e-06, "loss": 0.0308, "num_tokens": 5120077.0, "reward": 0.8530287742614746, "reward_std": 0.5156837701797485, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.5240890979766846, "rewards/format_reward_step": 0.80859375, "rewards/stepwise_brier_reward": 0.5873744487762451, "step": 18 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 4.90234375, "calib/ece": 0.32182203389830516, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.859375, "calib/frac_conf_gt_0.9": 0.038135593220338986, "calib/gap": 0.03419046206333909, "calib/mean_conf": 0.725635593220339, "calib/mu_c": 0.7457731958762887, "calib/mu_w": 0.7115827338129496, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.3182203389830509, "calib/std_conf": 0.13758209661650694, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 889.0, "completions/max_terminated_length": 889.0, "completions/mean_length": 299.90625, "completions/mean_terminated_length": 302.2677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.020266666666666665, "grad_norm": 0.6062198877334595, "learning_rate": 4.5e-06, "loss": -0.0175, "num_tokens": 5324653.0, "reward": 0.93487948179245, "reward_std": 0.5158452987670898, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.5712933540344238, "rewards/format_reward_step": 0.859375, "rewards/stepwise_brier_reward": 0.6750562787055969, "step": 19 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.22338645418326694, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.01116274634456449, "calib/mean_conf": 0.7054581673306772, "calib/mu_c": 0.7112396694214876, "calib/mu_w": 0.7000769230769232, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.22338645418326694, "calib/std_conf": 0.1201198324442809, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 308.921875, "completions/mean_terminated_length": 310.13336181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.021333333333333333, "grad_norm": 0.4974098205566406, "learning_rate": 4.75e-06, "loss": -0.011, "num_tokens": 5531649.0, "reward": 1.0871165990829468, "reward_std": 0.47273990511894226, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6622281074523926, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7521353363990784, "step": 20 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.59765625, "calib/ece": 0.20835294117647063, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.0202500000000001, "calib/mean_conf": 0.6761960784313724, "calib/mu_c": 0.6869166666666667, "calib/mu_w": 0.6666666666666666, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2069803921568628, "calib/std_conf": 0.11498695912552442, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 673.0, "completions/max_terminated_length": 673.0, "completions/mean_length": 296.0234375, "completions/mean_terminated_length": 298.3543395996094, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.0224, "grad_norm": 0.591964840888977, "learning_rate": 5e-06, "loss": 0.032, "num_tokens": 5733431.0, "reward": 1.120538592338562, "reward_std": 0.46937960386276245, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7012964487075806, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8076863288879395, "step": 21 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.44140625, "calib/ece": 0.184404761904762, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.049787878787878714, "calib/mean_conf": 0.657420634920635, "calib/mu_c": 0.6835, "calib/mu_w": 0.6337121212121213, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.1828174603174604, "calib/std_conf": 0.1132987239294482, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 922.0, "completions/max_terminated_length": 922.0, "completions/mean_length": 289.27734375, "completions/mean_terminated_length": 291.55511474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.023466666666666667, "grad_norm": 1.960092544555664, "learning_rate": 4.9722222222222224e-06, "loss": -0.025, "num_tokens": 5932342.0, "reward": 1.1121745109558105, "reward_std": 0.41868239641189575, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7098742723464966, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7664496898651123, "step": 22 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.30859375, "calib/ece": 0.17490196078431378, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016686335403726882, "calib/mean_conf": 0.6258823529411763, "calib/mu_c": 0.6350434782608697, "calib/mu_w": 0.6183571428571428, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.17490196078431378, "calib/std_conf": 0.0993108897645984, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 826.0, "completions/max_terminated_length": 826.0, "completions/mean_length": 297.53515625, "completions/mean_terminated_length": 299.8779602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.024533333333333334, "grad_norm": 0.648651659488678, "learning_rate": 4.944444444444445e-06, "loss": 0.0017, "num_tokens": 6135487.0, "reward": 1.103767991065979, "reward_std": 0.4504122734069824, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.7110418081283569, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8008010387420654, "step": 23 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.87890625, "calib/ece": 0.2616470588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0043316624895572975, "calib/mean_conf": 0.5706666666666667, "calib/mu_c": 0.5735714285714286, "calib/mu_w": 0.5692397660818713, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25145098039215685, "calib/std_conf": 0.10516815045098513, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2029.0, "completions/max_terminated_length": 2029.0, "completions/mean_length": 316.91796875, "completions/mean_terminated_length": 318.1607971191406, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.0256, "grad_norm": 1.1349138021469116, "learning_rate": 4.9166666666666665e-06, "loss": 0.0303, "num_tokens": 6344170.0, "reward": 0.9788568615913391, "reward_std": 0.4376360774040222, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.7089694738388062, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7865509986877441, "step": 24 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.01953125, "calib/ece": 0.03484374999999991, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.042696697796496375, "calib/mean_conf": 0.502421875, "calib/mu_c": 0.5239370078740158, "calib/mu_w": 0.4812403100775194, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.020585937499999887, "calib/std_conf": 0.09926484534559239, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 692.0, "completions/max_terminated_length": 692.0, "completions/mean_length": 320.06640625, "completions/mean_terminated_length": 322.58660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.02666666666666667, "grad_norm": 0.5203309059143066, "learning_rate": 4.888888888888889e-06, "loss": 0.0108, "num_tokens": 6552371.0, "reward": 1.1763631105422974, "reward_std": 0.37089306116104126, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7542519569396973, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.815698504447937, "step": 25 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.01953125, "calib/ece": 0.04555118110236215, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017968206656731267, "calib/mean_conf": 0.45484251968503936, "calib/mu_c": 0.4641803278688525, "calib/mu_w": 0.4462121212121212, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010039370078740138, "calib/std_conf": 0.10860141131004274, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 686.0, "completions/max_terminated_length": 686.0, "completions/mean_length": 323.80078125, "completions/mean_terminated_length": 326.35040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.027733333333333332, "grad_norm": 0.5072885155677795, "learning_rate": 4.861111111111111e-06, "loss": 0.001, "num_tokens": 6763544.0, "reward": 1.1537525653839111, "reward_std": 0.31776195764541626, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.7410793304443359, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.829727053642273, "step": 26 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.90234375, "calib/ece": 0.07996062992125977, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02364525407478424, "calib/mean_conf": 0.4087007874015748, "calib/mu_c": 0.42257142857142854, "calib/mu_w": 0.3989261744966443, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.037637795275590524, "calib/std_conf": 0.11348577315259133, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 859.0, "completions/max_terminated_length": 859.0, "completions/mean_length": 327.40234375, "completions/mean_terminated_length": 329.9803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0288, "grad_norm": 0.7500645518302917, "learning_rate": 4.833333333333333e-06, "loss": -0.0049, "num_tokens": 6975615.0, "reward": 1.0971195697784424, "reward_std": 0.34505730867385864, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7501621246337891, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8506540656089783, "step": 27 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.1015625, "calib/ece": 0.2410196078431372, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032722222222222264, "calib/mean_conf": 0.2934901960784314, "calib/mu_c": 0.30888888888888894, "calib/mu_w": 0.27616666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002549019607843136, "calib/std_conf": 0.13479386914390834, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 338.34765625, "completions/mean_terminated_length": 341.0118103027344, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.029866666666666666, "grad_norm": 0.5649840235710144, "learning_rate": 4.805555555555556e-06, "loss": 0.0344, "num_tokens": 7192216.0, "reward": 1.1840614080429077, "reward_std": 0.25355055928230286, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6906328201293945, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8471675515174866, "step": 28 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.66796875, "calib/ece": 0.27051181102362204, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014085941381023315, "calib/mean_conf": 0.22161417322834648, "calib/mu_c": 0.2289344262295082, "calib/mu_w": 0.21484848484848487, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005905511811023622, "calib/std_conf": 0.12363328726673672, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1565.0, "completions/max_terminated_length": 1565.0, "completions/mean_length": 395.6953125, "completions/mean_terminated_length": 398.81103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.030933333333333334, "grad_norm": 0.8290421962738037, "learning_rate": 4.777777777777778e-06, "loss": -0.0297, "num_tokens": 7423682.0, "reward": 1.122117042541504, "reward_std": 0.25590378046035767, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6661832332611084, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.854539692401886, "step": 29 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.703125, "calib/ece": 0.22226562499999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04535552942632587, "calib/mean_conf": 0.230859375, "calib/mu_c": 0.25619469026548675, "calib/mu_w": 0.21083916083916088, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005859374999999998, "calib/std_conf": 0.12486259537831726, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1061.0, "completions/max_terminated_length": 1061.0, "completions/mean_length": 351.91015625, "completions/mean_terminated_length": 354.68109130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.032, "grad_norm": 0.6820420622825623, "learning_rate": 4.75e-06, "loss": -0.0371, "num_tokens": 7643795.0, "reward": 1.1040761470794678, "reward_std": 0.2836735248565674, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7090429663658142, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8357192277908325, "step": 30 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.03125, "calib/ece": 0.16719921875000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07440250015863953, "calib/mean_conf": 0.23514453125, "calib/mu_c": 0.2796116504854369, "calib/mu_w": 0.20520915032679737, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.13737968674250123, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1105.0, "completions/max_terminated_length": 1105.0, "completions/mean_length": 369.02734375, "completions/mean_terminated_length": 371.9330749511719, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.03306666666666667, "grad_norm": 0.865591287612915, "learning_rate": 4.722222222222222e-06, "loss": -0.0197, "num_tokens": 7867218.0, "reward": 1.0817630290985107, "reward_std": 0.2301708161830902, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.7484901547431946, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8206971883773804, "step": 31 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.2751764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024264597191426485, "calib/mean_conf": 0.22443137254901963, "calib/mu_c": 0.2369918699186992, "calib/mu_w": 0.2127272727272727, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008627450980392156, "calib/std_conf": 0.11848210833773733, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 841.0, "completions/max_terminated_length": 841.0, "completions/mean_length": 348.44921875, "completions/mean_terminated_length": 351.1929016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.034133333333333335, "grad_norm": 1.52852463722229, "learning_rate": 4.694444444444445e-06, "loss": 0.0016, "num_tokens": 8086165.0, "reward": 1.1227799654006958, "reward_std": 0.20674574375152588, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6792035102844238, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8124001026153564, "step": 32 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 5.4453125, "calib/ece": 0.307578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009687500000000016, "calib/mean_conf": 0.192421875, "calib/mu_c": 0.197265625, "calib/mu_w": 0.18757812499999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.10805269326344613, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1120.0, "completions/max_terminated_length": 1120.0, "completions/mean_length": 367.27734375, "completions/mean_terminated_length": 370.1692810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.0352, "grad_norm": 1.088653326034546, "learning_rate": 4.666666666666667e-06, "loss": 0.01, "num_tokens": 8310100.0, "reward": 1.1242845058441162, "reward_std": 0.20601904392242432, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6485640406608582, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8000099658966064, "step": 33 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.375, "calib/ece": 0.35748031496062993, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01682452642073773, "calib/mean_conf": 0.18188976377952756, "calib/mu_c": 0.18970588235294117, "calib/mu_w": 0.17288135593220344, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0019685039370078744, "calib/std_conf": 0.10140145740822037, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2223.0, "completions/max_terminated_length": 2223.0, "completions/mean_length": 362.05859375, "completions/mean_terminated_length": 363.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.03626666666666667, "grad_norm": 1.0133696794509888, "learning_rate": 4.638888888888889e-06, "loss": 0.056, "num_tokens": 8530939.0, "reward": 1.1375608444213867, "reward_std": 0.2840504050254822, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6156054735183716, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7987202405929565, "step": 34 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.53125, "calib/ece": 0.3446484374999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04685051958433248, "calib/mean_conf": 0.19988281250000003, "calib/mu_c": 0.22129496402877696, "calib/mu_w": 0.17444444444444449, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.0007812499999999998, "calib/std_conf": 0.11171770514153001, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 383.30078125, "completions/mean_terminated_length": 386.31890869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.037333333333333336, "grad_norm": 1.2451777458190918, "learning_rate": 4.611111111111112e-06, "loss": 0.0221, "num_tokens": 8761360.0, "reward": 1.143646478652954, "reward_std": 0.25098735094070435, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6174097657203674, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7819544076919556, "step": 35 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 3.7578125, "calib/ece": 0.459764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06412857142857142, "calib/mean_conf": 0.22650980392156858, "calib/mu_c": 0.24662857142857142, "calib/mu_w": 0.1825, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.09234618850463783, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 851.0, "completions/max_terminated_length": 851.0, "completions/mean_length": 308.08984375, "completions/mean_terminated_length": 310.5157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.0384, "grad_norm": 1.2867798805236816, "learning_rate": 4.583333333333333e-06, "loss": 0.0763, "num_tokens": 8965983.0, "reward": 1.2754257917404175, "reward_std": 0.25121673941612244, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.5783679485321045, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8184047937393188, "step": 36 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 3.9296875, "calib/ece": 0.23402390438247014, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0461296083684144, "calib/mean_conf": 0.29984063745019923, "calib/mu_c": 0.3213432835820896, "calib/mu_w": 0.2752136752136752, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.11390905762211623, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1939.0, "completions/max_terminated_length": 1939.0, "completions/mean_length": 381.1484375, "completions/mean_terminated_length": 384.14959716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.039466666666666664, "grad_norm": 0.9557704329490662, "learning_rate": 4.555555555555556e-06, "loss": 0.0051, "num_tokens": 9193693.0, "reward": 1.1587154865264893, "reward_std": 0.2776384949684143, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6743347644805908, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7924422025680542, "step": 37 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 3.91796875, "calib/ece": 0.16411764705882348, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07123686186186184, "calib/mean_conf": 0.40058823529411763, "calib/mu_c": 0.4315972222222222, "calib/mu_w": 0.36036036036036034, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.1357248046846623, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2757.0, "completions/max_terminated_length": 2757.0, "completions/mean_length": 414.10546875, "completions/mean_terminated_length": 415.72943115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.04053333333333333, "grad_norm": 1.3092141151428223, "learning_rate": 4.527777777777778e-06, "loss": 0.0077, "num_tokens": 9429632.0, "reward": 1.2299902439117432, "reward_std": 0.3092259168624878, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7314550876617432, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8133012056350708, "step": 38 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 3.94921875, "calib/ece": 0.05313725490196089, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04198345541803494, "calib/mean_conf": 0.5374509803921569, "calib/mu_c": 0.5550675675675676, "calib/mu_w": 0.5130841121495326, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.005098039215686271, "calib/std_conf": 0.10403323634178029, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2034.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 385.234375, "completions/mean_terminated_length": 386.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.0416, "grad_norm": 1.2867313623428345, "learning_rate": 4.5e-06, "loss": 0.0126, "num_tokens": 9657380.0, "reward": 1.2601454257965088, "reward_std": 0.2809554934501648, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.7519237995147705, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8304837942123413, "step": 39 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 3.66796875, "calib/ece": 0.018823529411764572, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015337787212787224, "calib/mean_conf": 0.5733333333333333, "calib/mu_c": 0.58006993006993, "calib/mu_w": 0.5647321428571428, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.015686274509803786, "calib/std_conf": 0.06769790643696237, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 406.4296875, "completions/mean_terminated_length": 408.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.042666666666666665, "grad_norm": 0.8483911752700806, "learning_rate": 4.472222222222223e-06, "loss": 0.0088, "num_tokens": 9891226.0, "reward": 1.2255644798278809, "reward_std": 0.35025548934936523, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7437109351158142, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.786710798740387, "step": 40 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 3.38671875, "calib/ece": 0.17570281124498008, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015512934879571727, "calib/mean_conf": 0.5889558232931728, "calib/mu_c": 0.5926315789473684, "calib/mu_w": 0.5771186440677967, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0008032128514056232, "calib/std_conf": 0.048558684491893225, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1105.0, "completions/max_terminated_length": 1105.0, "completions/mean_length": 349.5546875, "completions/mean_terminated_length": 357.94403076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.04373333333333333, "grad_norm": 2.9355227947235107, "learning_rate": 4.444444444444444e-06, "loss": -0.0397, "num_tokens": 10111000.0, "reward": 1.4171063899993896, "reward_std": 0.3127620220184326, "rewards/accuracy_reward_step": 0.7421875, "rewards/final_brier_reward_step": 0.7581347227096558, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8005934953689575, "step": 41 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 3.39453125, "calib/ece": 0.021599999999999748, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005966930265995818, "calib/mean_conf": 0.5935999999999999, "calib/mu_c": 0.5961538461538461, "calib/mu_w": 0.5901869158878503, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.021599999999999748, "calib/std_conf": 0.03433715189120961, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1590.0, "completions/max_terminated_length": 1590.0, "completions/mean_length": 330.984375, "completions/mean_terminated_length": 338.9280090332031, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.0448, "grad_norm": 2.04837965965271, "learning_rate": 4.416666666666667e-06, "loss": -0.0437, "num_tokens": 10323140.0, "reward": 1.219673752784729, "reward_std": 0.2220257967710495, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7362304925918579, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7827968597412109, "step": 42 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 3.171875, "calib/ece": 0.032480314960630106, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008940748096656792, "calib/mean_conf": 0.5982283464566929, "calib/mu_c": 0.6015723270440252, "calib/mu_w": 0.5926315789473684, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.002362204724409446, "calib/std_conf": 0.03446456333122836, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1652.0, "completions/max_terminated_length": 1652.0, "completions/mean_length": 411.0390625, "completions/mean_terminated_length": 411.0390625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.04586666666666667, "grad_norm": 0.8822596073150635, "learning_rate": 4.388888888888889e-06, "loss": 0.0455, "num_tokens": 10556630.0, "reward": 1.2898056507110596, "reward_std": 0.2994333803653717, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.742919921875, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8015077114105225, "step": 43 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 2.828125, "calib/ece": 0.08119999999999976, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006368772470467632, "calib/mean_conf": 0.6075999999999999, "calib/mu_c": 0.6106060606060606, "calib/mu_w": 0.604237288135593, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.08039999999999976, "calib/std_conf": 0.03636261816756323, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1850.0, "completions/max_terminated_length": 1850.0, "completions/mean_length": 421.0234375, "completions/mean_terminated_length": 429.4103698730469, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.046933333333333334, "grad_norm": 0.9918916821479797, "learning_rate": 4.361111111111112e-06, "loss": -0.0099, "num_tokens": 10793772.0, "reward": 1.165583610534668, "reward_std": 0.25042974948883057, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7213085889816284, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7712799310684204, "step": 44 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 3.01953125, "calib/ece": 0.053149606299212726, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011876693766937674, "calib/mean_conf": 0.6137795275590551, "calib/mu_c": 0.6179878048780487, "calib/mu_w": 0.606111111111111, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.01062992125984252, "calib/std_conf": 0.04099028373171769, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2232.0, "completions/max_terminated_length": 2232.0, "completions/mean_length": 379.125, "completions/mean_terminated_length": 382.1102294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.048, "grad_norm": 1.1430001258850098, "learning_rate": 4.333333333333334e-06, "loss": -0.0041, "num_tokens": 11018916.0, "reward": 1.3244693279266357, "reward_std": 0.2840193510055542, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7614257335662842, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8187761306762695, "step": 45 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 2.796875, "calib/ece": 0.07094861660079053, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009422492401215954, "calib/mean_conf": 0.6266798418972331, "calib/mu_c": 0.6308510638297873, "calib/mu_w": 0.6214285714285713, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.07015810276679844, "calib/std_conf": 0.054714894070779146, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 455.6484375, "completions/mean_terminated_length": 459.2362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.04906666666666667, "grad_norm": 0.6162858009338379, "learning_rate": 4.305555555555556e-06, "loss": 0.0113, "num_tokens": 11263370.0, "reward": 1.2110141515731812, "reward_std": 0.29757267236709595, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7264159917831421, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8005996346473694, "step": 46 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 2.74609375, "calib/ece": 0.040778688524590156, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010471243601324898, "calib/mean_conf": 0.6301229508196721, "calib/mu_c": 0.633641975308642, "calib/mu_w": 0.6231707317073171, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.003483606557377053, "calib/std_conf": 0.05165682438987863, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2842.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 423.41796875, "completions/mean_terminated_length": 435.3212585449219, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.050133333333333335, "grad_norm": 0.7916970252990723, "learning_rate": 4.277777777777778e-06, "loss": -0.0065, "num_tokens": 11500781.0, "reward": 1.2935917377471924, "reward_std": 0.2790271043777466, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7379980087280273, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.787433385848999, "step": 47 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 2.3671875, "calib/ece": 0.04126984126984135, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018509803921568646, "calib/mean_conf": 0.6365079365079366, "calib/mu_c": 0.644, "calib/mu_w": 0.6254901960784314, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04126984126984135, "calib/std_conf": 0.05248306545195376, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2133.0, "completions/max_terminated_length": 2133.0, "completions/mean_length": 358.8046875, "completions/mean_terminated_length": 365.95220947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0512, "grad_norm": 0.8702788352966309, "learning_rate": 4.25e-06, "loss": -0.0334, "num_tokens": 11719363.0, "reward": 1.254289150238037, "reward_std": 0.3980778753757477, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7466015815734863, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7895784378051758, "step": 48 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 2.42578125, "calib/ece": 0.07601626016260174, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01412429378531077, "calib/mean_conf": 0.6434959349593495, "calib/mu_c": 0.6474576271186441, "calib/mu_w": 0.6333333333333333, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.0575443800429925, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2049.0, "completions/max_terminated_length": 2049.0, "completions/mean_length": 372.671875, "completions/mean_terminated_length": 386.2510070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.05226666666666667, "grad_norm": 0.6196070909500122, "learning_rate": 4.222222222222223e-06, "loss": -0.0149, "num_tokens": 11942343.0, "reward": 1.3646574020385742, "reward_std": 0.31304365396499634, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.7571874856948853, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7973794937133789, "step": 49 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 2.33203125, "calib/ece": 0.06333333333333331, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007627504553734399, "calib/mean_conf": 0.6543137254901961, "calib/mu_c": 0.6540983606557377, "calib/mu_w": 0.6548611111111111, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05531566816865047, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 369.453125, "completions/mean_terminated_length": 372.3622131347656, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.05333333333333334, "grad_norm": 1.208615779876709, "learning_rate": 4.194444444444445e-06, "loss": 0.0162, "num_tokens": 12165323.0, "reward": 1.405625343322754, "reward_std": 0.3000626266002655, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7803418040275574, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8071305751800537, "step": 50 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 2.48828125, "calib/ece": 0.1295180722891566, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.052451690821256225, "calib/mean_conf": 0.6524096385542169, "calib/mu_c": 0.6669444444444446, "calib/mu_w": 0.6144927536231883, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02951807228915665, "calib/std_conf": 0.06596683531073422, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2124.0, "completions/max_terminated_length": 2124.0, "completions/mean_length": 397.078125, "completions/mean_terminated_length": 408.2409362792969, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.0544, "grad_norm": 0.672381579875946, "learning_rate": 4.166666666666667e-06, "loss": -0.0342, "num_tokens": 12399311.0, "reward": 1.3945393562316895, "reward_std": 0.3444993793964386, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7891894578933716, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7982159852981567, "step": 51 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 2.3984375, "calib/ece": 0.14576612903225805, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.038650217706821643, "calib/mean_conf": 0.6502016129032258, "calib/mu_c": 0.6584615384615385, "calib/mu_w": 0.6198113207547169, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.004838709677419377, "calib/std_conf": 0.06342031828674537, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 407.47265625, "completions/mean_terminated_length": 417.25201416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.055466666666666664, "grad_norm": 0.7831748723983765, "learning_rate": 4.138888888888889e-06, "loss": -0.0284, "num_tokens": 12634616.0, "reward": 1.4558947086334229, "reward_std": 0.2825433611869812, "rewards/accuracy_reward_step": 0.76171875, "rewards/final_brier_reward_step": 0.7937792539596558, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8032076954841614, "step": 52 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 2.515625, "calib/ece": 0.048795180722891435, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02941331923890056, "calib/mean_conf": 0.6475903614457831, "calib/mu_c": 0.656686046511628, "calib/mu_w": 0.6272727272727274, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0028112449799196767, "calib/std_conf": 0.06642186631500918, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 412.5546875, "completions/mean_terminated_length": 422.4560241699219, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.05653333333333333, "grad_norm": 1.552283763885498, "learning_rate": 4.111111111111111e-06, "loss": -0.0257, "num_tokens": 12869094.0, "reward": 1.3601384162902832, "reward_std": 0.2909051179885864, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.7710058689117432, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8219792246818542, "step": 53 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 2.5234375, "calib/ece": 0.1659448818897638, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019533862876254138, "calib/mean_conf": 0.6529527559055118, "calib/mu_c": 0.6564903846153846, "calib/mu_w": 0.6369565217391304, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05682842782393699, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1038.0, "completions/max_terminated_length": 1038.0, "completions/mean_length": 361.6796875, "completions/mean_terminated_length": 365.9683837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.0576, "grad_norm": 1.2853453159332275, "learning_rate": 4.083333333333334e-06, "loss": 0.0224, "num_tokens": 13090956.0, "reward": 1.529407024383545, "reward_std": 0.27310827374458313, "rewards/accuracy_reward_step": 0.8125, "rewards/final_brier_reward_step": 0.820263683795929, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8302257657051086, "step": 54 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 2.43359375, "calib/ece": 0.15301204819277095, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04438768870380028, "calib/mean_conf": 0.657429718875502, "calib/mu_c": 0.6775735294117647, "calib/mu_w": 0.6331858407079645, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13212851405622492, "calib/std_conf": 0.05786547965119041, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1458.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 365.26953125, "completions/mean_terminated_length": 375.53814697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.058666666666666666, "grad_norm": 0.9097878932952881, "learning_rate": 4.055555555555556e-06, "loss": -0.028, "num_tokens": 13315329.0, "reward": 1.1907155513763428, "reward_std": 0.36655521392822266, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7376757860183716, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7734479904174805, "step": 55 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 2.35546875, "calib/ece": 0.1114919354838711, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025295360042241755, "calib/mean_conf": 0.6719758064516128, "calib/mu_c": 0.6830935251798562, "calib/mu_w": 0.6577981651376145, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1114919354838711, "calib/std_conf": 0.05149897645548441, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1846.0, "completions/max_terminated_length": 1846.0, "completions/mean_length": 361.5859375, "completions/mean_terminated_length": 373.25, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.05973333333333333, "grad_norm": 1.6511852741241455, "learning_rate": 4.027777777777779e-06, "loss": -0.0633, "num_tokens": 13537775.0, "reward": 1.2042827606201172, "reward_std": 0.3192186653614044, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7275683879852295, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.802619218826294, "step": 56 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 2.1640625, "calib/ece": 0.06592741935483865, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02264263547386014, "calib/mean_conf": 0.6816532258064517, "calib/mu_c": 0.688135593220339, "calib/mu_w": 0.6654929577464789, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.016935483870967723, "calib/std_conf": 0.05095943110579659, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2458.0, "completions/max_terminated_length": 2458.0, "completions/mean_length": 342.28125, "completions/mean_terminated_length": 350.4960021972656, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.0608, "grad_norm": 0.83037930727005, "learning_rate": 4.000000000000001e-06, "loss": -0.0082, "num_tokens": 13755231.0, "reward": 1.3770034313201904, "reward_std": 0.3410765826702118, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.7762597799301147, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8023691177368164, "step": 57 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 2.1953125, "calib/ece": 0.11526104417670681, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002533894958537397, "calib/mean_conf": 0.6791164658634538, "calib/mu_c": 0.6792253521126762, "calib/mu_w": 0.6789719626168225, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.11204819277108433, "calib/std_conf": 0.050356385416037486, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1757.0, "completions/max_terminated_length": 1757.0, "completions/mean_length": 397.59765625, "completions/mean_terminated_length": 408.77508544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.06186666666666667, "grad_norm": 0.8900550603866577, "learning_rate": 3.972222222222223e-06, "loss": -0.0146, "num_tokens": 13986376.0, "reward": 1.205758810043335, "reward_std": 0.3948570489883423, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7204296588897705, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7743635773658752, "step": 58 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 2.234375, "calib/ece": 0.10098425196850404, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.013437500000000102, "calib/mean_conf": 0.6915354330708663, "calib/mu_c": 0.6865625, "calib/mu_w": 0.7000000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0812992125984253, "calib/std_conf": 0.0657872130882196, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 334.8984375, "completions/mean_terminated_length": 337.5354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.06293333333333333, "grad_norm": 1.1755027770996094, "learning_rate": 3.944444444444445e-06, "loss": -0.025, "num_tokens": 14201398.0, "reward": 1.300520420074463, "reward_std": 0.29623836278915405, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7466113567352295, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8119841814041138, "step": 59 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 2.3359375, "calib/ece": 0.13963414634146354, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027178082191780972, "calib/mean_conf": 0.733130081300813, "calib/mu_c": 0.7441780821917808, "calib/mu_w": 0.7169999999999999, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13963414634146354, "calib/std_conf": 0.05512328523219692, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2096.0, "completions/max_terminated_length": 2096.0, "completions/mean_length": 365.56640625, "completions/mean_terminated_length": 377.3588562011719, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.064, "grad_norm": 0.7373173236846924, "learning_rate": 3.916666666666667e-06, "loss": -0.0372, "num_tokens": 14426879.0, "reward": 1.2209932804107666, "reward_std": 0.359197199344635, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7200488448143005, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7782503366470337, "step": 60 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 2.3515625, "calib/ece": 0.0958984375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.006229542447245384, "calib/mean_conf": 0.7716796875, "calib/mu_c": 0.7736994219653177, "calib/mu_w": 0.7674698795180723, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0958984375, "calib/std_conf": 0.03682611878683857, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 811.0, "completions/max_terminated_length": 811.0, "completions/mean_length": 303.28125, "completions/mean_terminated_length": 305.6692810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.06506666666666666, "grad_norm": 0.8471395969390869, "learning_rate": 3.88888888888889e-06, "loss": -0.0103, "num_tokens": 14631623.0, "reward": 1.3641399145126343, "reward_std": 0.2312508374452591, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7730761766433716, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8072821497917175, "step": 61 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 2.64453125, "calib/ece": 0.10040485829959504, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.02245315698315198, "calib/mean_conf": 0.8048582995951415, "calib/mu_c": 0.8114942528735628, "calib/mu_w": 0.7890410958904108, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10040485829959504, "calib/std_conf": 0.05580366248649091, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 1762.0, "completions/max_terminated_length": 1762.0, "completions/mean_length": 361.671875, "completions/mean_terminated_length": 374.8502197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.06613333333333334, "grad_norm": 0.7572021484375, "learning_rate": 3.861111111111112e-06, "loss": -0.0409, "num_tokens": 14854331.0, "reward": 1.3489103317260742, "reward_std": 0.442284494638443, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.758544921875, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7754268050193787, "step": 62 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 2.84375, "calib/ece": 0.15887096774193551, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.040039492671071764, "calib/mean_conf": 0.8483870967741935, "calib/mu_c": 0.8608187134502924, "calib/mu_w": 0.8207792207792206, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15887096774193551, "calib/std_conf": 0.056058539345105705, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1618.0, "completions/max_terminated_length": 1618.0, "completions/mean_length": 406.0234375, "completions/mean_terminated_length": 419.1209411621094, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.0672, "grad_norm": 0.5545381307601929, "learning_rate": 3.833333333333334e-06, "loss": -0.0666, "num_tokens": 15089953.0, "reward": 1.3344875574111938, "reward_std": 0.36827802658081055, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7504687309265137, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7776375412940979, "step": 63 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 2.96484375, "calib/ece": 0.14575510204081638, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.036734693877551024, "calib/gap": 0.02861982223712889, "calib/mean_conf": 0.8722857142857144, "calib/mu_c": 0.8801123595505618, "calib/mu_w": 0.8514925373134329, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14575510204081638, "calib/std_conf": 0.051194706358994514, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 358.65234375, "completions/mean_terminated_length": 373.231689453125, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.06826666666666667, "grad_norm": 0.6435424089431763, "learning_rate": 3.8055555555555556e-06, "loss": -0.0634, "num_tokens": 15308584.0, "reward": 1.3586997985839844, "reward_std": 0.38508233428001404, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7549285292625427, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7608799934387207, "step": 64 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 2.9296875, "calib/ece": 0.3197254901960785, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.19215686274509805, "calib/gap": 0.012923718110634086, "calib/mean_conf": 0.9001176470588237, "calib/mu_c": 0.9055405405405408, "calib/mu_w": 0.8926168224299067, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3197254901960785, "calib/std_conf": 0.0455939654842756, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 340.43359375, "completions/mean_terminated_length": 343.1141662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.06933333333333333, "grad_norm": 0.9150872230529785, "learning_rate": 3.777777777777778e-06, "loss": -0.0012, "num_tokens": 15523799.0, "reward": 1.1812100410461426, "reward_std": 0.23995336890220642, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6558824777603149, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7021381855010986, "step": 65 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 2.9296875, "calib/ece": 0.36089430894308955, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.21138211382113822, "calib/gap": 0.009404484662984824, "calib/mean_conf": 0.9015447154471546, "calib/mu_c": 0.9058646616541354, "calib/mu_w": 0.8964601769911505, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36089430894308955, "calib/std_conf": 0.053746355827687775, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 1694.0, "completions/max_terminated_length": 1694.0, "completions/mean_length": 417.92578125, "completions/mean_terminated_length": 434.91461181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.0704, "grad_norm": 0.8764449954032898, "learning_rate": 3.7500000000000005e-06, "loss": -0.0824, "num_tokens": 15760180.0, "reward": 1.0901968479156494, "reward_std": 0.353530615568161, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5988469123840332, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7005934715270996, "step": 66 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 2.796875, "calib/ece": 0.29043010752688203, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.4717741935483871, "calib/gap": 0.020243788091889425, "calib/mean_conf": 0.9275268817204302, "calib/mu_c": 0.9348734177215192, "calib/mu_w": 0.9146296296296298, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29043010752688203, "calib/std_conf": 0.057126331373145836, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 385.08984375, "completions/mean_terminated_length": 397.5120849609375, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.07146666666666666, "grad_norm": 0.8649067878723145, "learning_rate": 3.7222222222222225e-06, "loss": -0.0527, "num_tokens": 15986811.0, "reward": 1.2355461120605469, "reward_std": 0.30521702766418457, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6689639091491699, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7480067014694214, "step": 67 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 2.7109375, "calib/ece": 0.3118623481781378, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.7489878542510121, "calib/gap": 0.011363248471056742, "calib/mean_conf": 0.9515384615384617, "calib/mu_c": 0.9556329113924051, "calib/mu_w": 0.9442696629213484, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3118623481781378, "calib/std_conf": 0.04500181663518866, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1954.0, "completions/max_terminated_length": 1954.0, "completions/mean_length": 389.63671875, "completions/mean_terminated_length": 402.20562744140625, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.07253333333333334, "grad_norm": 0.6878625750541687, "learning_rate": 3.694444444444445e-06, "loss": -0.0724, "num_tokens": 16213686.0, "reward": 1.222684383392334, "reward_std": 0.3363970220088959, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6517175436019897, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7326153516769409, "step": 68 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 2.8359375, "calib/ece": 0.3385216326530613, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.8489795918367347, "calib/gap": 0.03790612244897962, "calib/mean_conf": 0.9323355102040818, "calib/mu_c": 0.9474979591836736, "calib/mu_w": 0.909591836734694, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3354285714285715, "calib/std_conf": 0.13545859313203842, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 1884.0, "completions/max_terminated_length": 1884.0, "completions/mean_length": 456.0078125, "completions/mean_terminated_length": 474.5447082519531, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.0736, "grad_norm": 0.7570173144340515, "learning_rate": 3.6666666666666666e-06, "loss": -0.0322, "num_tokens": 16457960.0, "reward": 1.166851282119751, "reward_std": 0.4555240571498871, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6214951276779175, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.744727373123169, "step": 69 }, { "calib/answer_extract_rate": 0.93359375, "calib/avg_num_step_conf": 2.85546875, "calib/ece": 0.3566386554621849, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.8781512605042017, "calib/gap": 0.010691489361701945, "calib/mean_conf": 0.9532773109243698, "calib/mu_c": 0.9575, "calib/mu_w": 0.9468085106382981, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.352436974789916, "calib/std_conf": 0.06165925944350786, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1869.0, "completions/max_terminated_length": 1869.0, "completions/mean_length": 465.453125, "completions/mean_terminated_length": 496.4833679199219, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.07466666666666667, "grad_norm": 0.7547297477722168, "learning_rate": 3.638888888888889e-06, "loss": -0.091, "num_tokens": 16707148.0, "reward": 1.1299214363098145, "reward_std": 0.36550194025039673, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5958437323570251, "rewards/format_reward_step": 0.92578125, "rewards/stepwise_brier_reward": 0.7076860666275024, "step": 70 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 3.25390625, "calib/ece": 0.3592358333333335, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.8583333333333333, "calib/gap": 0.032312013754566826, "calib/mean_conf": 0.9467358333333334, "calib/mu_c": 0.9600645390070922, "calib/mu_w": 0.9277525252525254, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3592358333333335, "calib/std_conf": 0.08266692486098791, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2263.0, "completions/max_terminated_length": 2263.0, "completions/mean_length": 525.49609375, "completions/mean_terminated_length": 551.3401489257812, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.07573333333333333, "grad_norm": 0.654914915561676, "learning_rate": 3.6111111111111115e-06, "loss": -0.0712, "num_tokens": 16969123.0, "reward": 1.122267246246338, "reward_std": 0.48198401927948, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5975936651229858, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.7001317739486694, "step": 71 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 3.6015625, "calib/ece": 0.415502074688797, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.9377593360995851, "calib/gap": 0.02130618212197144, "calib/mean_conf": 0.9673692946058092, "calib/mu_c": 0.9769172932330827, "calib/mu_w": 0.9556111111111113, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.415502074688797, "calib/std_conf": 0.04390711304391313, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 488.640625, "completions/mean_terminated_length": 519.053955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.0768, "grad_norm": 0.5807398557662964, "learning_rate": 3.5833333333333335e-06, "loss": -0.0847, "num_tokens": 17221663.0, "reward": 1.066742181777954, "reward_std": 0.4397001266479492, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5541670918464661, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.7039468288421631, "step": 72 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 4.16796875, "calib/ece": 0.18308298755186742, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.9045643153526971, "calib/gap": 0.0503100000000003, "calib/mean_conf": 0.9410622406639007, "calib/mu_c": 0.9515000000000003, "calib/mu_w": 0.90119, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16580705394190892, "calib/std_conf": 0.1489545415964191, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 1762.0, "completions/max_terminated_length": 1762.0, "completions/mean_length": 518.40625, "completions/mean_terminated_length": 543.901611328125, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.07786666666666667, "grad_norm": 0.39967501163482666, "learning_rate": 3.555555555555556e-06, "loss": -0.0999, "num_tokens": 17484447.0, "reward": 1.4234883785247803, "reward_std": 0.34371453523635864, "rewards/accuracy_reward_step": 0.75390625, "rewards/final_brier_reward_step": 0.7605338096618652, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.7806982398033142, "step": 73 }, { "calib/answer_extract_rate": 0.48828125, "calib/avg_num_step_conf": 21.6328125, "calib/ece": 0.33212000000000014, "calib/final_conf_rate": 0.48828125, "calib/format_rate": 0.484375, "calib/frac_conf_gt_0.9": 0.968, "calib/gap": 0.01060416666666708, "calib/mean_conf": 0.9721200000000002, "calib/mu_c": 0.9759375000000002, "calib/mu_w": 0.9653333333333332, "calib/nonempty_final_conf_rate": 0.48828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33212000000000014, "calib/std_conf": 0.027635223899943346, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.50390625, "completions/max_length": 1721.0, "completions/max_terminated_length": 1721.0, "completions/mean_length": 364.89453125, "completions/mean_terminated_length": 735.535400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.07893333333333333, "grad_norm": 1.1979538202285767, "learning_rate": 3.5277777777777784e-06, "loss": -0.6675, "num_tokens": 17704828.0, "reward": 0.6136009693145752, "reward_std": 0.6024596095085144, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.32385730743408203, "rewards/format_reward_step": 0.484375, "rewards/stepwise_brier_reward": 0.3629390001296997, "step": 74 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 4.19921875, "calib/ece": 0.14183673469387784, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.8734693877551021, "calib/gap": 0.03819538670284972, "calib/mean_conf": 0.9622448979591839, "calib/mu_c": 0.9691044776119405, "calib/mu_w": 0.9309090909090908, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14183673469387784, "calib/std_conf": 0.040721317995311763, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2474.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 550.30078125, "completions/mean_terminated_length": 575.0081176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.08, "grad_norm": 0.5308865308761597, "learning_rate": 3.5e-06, "loss": -0.0702, "num_tokens": 17973497.0, "reward": 1.488100290298462, "reward_std": 0.3117517828941345, "rewards/accuracy_reward_step": 0.78515625, "rewards/final_brier_reward_step": 0.8048710823059082, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8207836151123047, "step": 75 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 3.640625, "calib/ece": 0.23583333333333345, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.8083333333333333, "calib/gap": 0.046936181032291024, "calib/mean_conf": 0.9441666666666667, "calib/mu_c": 0.9576608187134504, "calib/mu_w": 0.9107246376811594, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.23375000000000012, "calib/std_conf": 0.083706663746416, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2172.0, "completions/max_terminated_length": 2172.0, "completions/mean_length": 521.97265625, "completions/mean_terminated_length": 556.7708740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.08106666666666666, "grad_norm": 0.4699781537055969, "learning_rate": 3.4722222222222224e-06, "loss": -0.1131, "num_tokens": 18233218.0, "reward": 1.2967987060546875, "reward_std": 0.34049344062805176, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7021620869636536, "rewards/format_reward_step": 0.92578125, "rewards/stepwise_brier_reward": 0.7406830787658691, "step": 76 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 3.87890625, "calib/ece": 0.19236092436974805, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.8151260504201681, "calib/gap": 0.07027323970037491, "calib/mean_conf": 0.934025630252101, "calib/mu_c": 0.951741573033708, "calib/mu_w": 0.8814683333333331, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18924369747899172, "calib/std_conf": 0.11022834690195274, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2780.0, "completions/max_terminated_length": 2780.0, "completions/mean_length": 582.9921875, "completions/mean_terminated_length": 624.460205078125, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.08213333333333334, "grad_norm": 0.4831450581550598, "learning_rate": 3.444444444444445e-06, "loss": -0.0916, "num_tokens": 18510168.0, "reward": 1.3424919843673706, "reward_std": 0.40851569175720215, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7341254353523254, "rewards/format_reward_step": 0.92578125, "rewards/stepwise_brier_reward": 0.7501545548439026, "step": 77 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 3.93359375, "calib/ece": 0.27076612903225805, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.6975806451612904, "calib/gap": 0.05114699971289138, "calib/mean_conf": 0.923991935483871, "calib/mu_c": 0.9417283950617285, "calib/mu_w": 0.8905813953488371, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27076612903225805, "calib/std_conf": 0.08716752465407296, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1903.0, "completions/max_terminated_length": 1903.0, "completions/mean_length": 653.8671875, "completions/mean_terminated_length": 674.9596557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 248.0, "epoch": 0.0832, "grad_norm": 0.5532412528991699, "learning_rate": 3.416666666666667e-06, "loss": -0.0544, "num_tokens": 18808622.0, "reward": 1.2611587047576904, "reward_std": 0.2908684015274048, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6933706998825073, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.739143431186676, "step": 78 }, { "calib/answer_extract_rate": 0.93359375, "calib/avg_num_step_conf": 3.890625, "calib/ece": 0.24228991596638672, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.8613445378151261, "calib/gap": 0.010500000000000287, "calib/mean_conf": 0.9483823529411766, "calib/mu_c": 0.9513823529411766, "calib/mu_w": 0.9408823529411763, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23819327731092452, "calib/std_conf": 0.08040606518685899, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 601.4453125, "completions/mean_terminated_length": 644.2259521484375, "completions/min_length": 0.0, "completions/min_terminated_length": 239.0, "epoch": 0.08426666666666667, "grad_norm": 0.47690510749816895, "learning_rate": 3.3888888888888893e-06, "loss": -0.0922, "num_tokens": 19092008.0, "reward": 1.2916498184204102, "reward_std": 0.2730216085910797, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.6869811415672302, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.7488871812820435, "step": 79 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 3.9375, "calib/ece": 0.20814814814814825, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9382716049382716, "calib/gap": 0.031239867354458717, "calib/mean_conf": 0.9653497942386833, "calib/mu_c": 0.9729347826086958, "calib/mu_w": 0.9416949152542371, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20814814814814825, "calib/std_conf": 0.04611870832058621, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 1914.0, "completions/max_terminated_length": 1914.0, "completions/mean_length": 533.3125, "completions/mean_terminated_length": 561.8436279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.08533333333333333, "grad_norm": 0.35896626114845276, "learning_rate": 3.3611111111111117e-06, "loss": -0.0762, "num_tokens": 19353736.0, "reward": 1.3742382526397705, "reward_std": 0.36067700386047363, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7424664497375488, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.7573330402374268, "step": 80 }, { "calib/answer_extract_rate": 0.921875, "calib/avg_num_step_conf": 4.140625, "calib/ece": 0.2328389830508477, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.9449152542372882, "calib/gap": 0.020306323185012243, "calib/mean_conf": 0.9680084745762714, "calib/mu_c": 0.9732571428571432, "calib/mu_w": 0.9529508196721309, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22966101694915275, "calib/std_conf": 0.04510646459788818, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2588.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 591.33984375, "completions/mean_terminated_length": 641.453369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.0864, "grad_norm": 0.40946197509765625, "learning_rate": 3.3333333333333333e-06, "loss": -0.1225, "num_tokens": 19634407.0, "reward": 1.3076531887054443, "reward_std": 0.453918993473053, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.6992918252944946, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.7304664850234985, "step": 81 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 4.56640625, "calib/ece": 0.2556302521008406, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.9495798319327731, "calib/gap": 0.012528585144453652, "calib/mean_conf": 0.9697478991596642, "calib/mu_c": 0.97327485380117, "calib/mu_w": 0.9607462686567163, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2534453781512608, "calib/std_conf": 0.03829259719303655, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 1848.0, "completions/max_terminated_length": 1848.0, "completions/mean_length": 566.63671875, "completions/mean_terminated_length": 609.4916381835938, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.08746666666666666, "grad_norm": 0.42007526755332947, "learning_rate": 3.3055555555555558e-06, "loss": -0.1233, "num_tokens": 19908058.0, "reward": 1.2855520248413086, "reward_std": 0.3105565309524536, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.686301589012146, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.7258551120758057, "step": 82 }, { "calib/answer_extract_rate": 0.88671875, "calib/avg_num_step_conf": 4.91015625, "calib/ece": 0.30422907488986817, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.8828125, "calib/frac_conf_gt_0.9": 0.9383259911894273, "calib/gap": 0.011205123736493627, "calib/mean_conf": 0.9694273127753307, "calib/mu_c": 0.9731788079470202, "calib/mu_w": 0.9619736842105265, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30422907488986817, "calib/std_conf": 0.03488341505762858, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2880.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 650.6796875, "completions/mean_terminated_length": 730.5877075195312, "completions/min_length": 0.0, "completions/min_terminated_length": 301.0, "epoch": 0.08853333333333334, "grad_norm": 0.4519554376602173, "learning_rate": 3.277777777777778e-06, "loss": -0.1525, "num_tokens": 20204936.0, "reward": 1.1441466808319092, "reward_std": 0.35974669456481934, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6066093444824219, "rewards/format_reward_step": 0.8828125, "rewards/stepwise_brier_reward": 0.6508677005767822, "step": 83 }, { "calib/answer_extract_rate": 0.93359375, "calib/avg_num_step_conf": 6.29296875, "calib/ece": 0.25924769874476994, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.9372384937238494, "calib/gap": 0.03571067348678636, "calib/mean_conf": 0.9538066945606696, "calib/mu_c": 0.9641164705882355, "calib/mu_w": 0.9284057971014491, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25087866108786616, "calib/std_conf": 0.11030594026286576, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 1956.0, "completions/max_terminated_length": 1956.0, "completions/mean_length": 640.3828125, "completions/mean_terminated_length": 685.9330444335938, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.0896, "grad_norm": 0.6918340921401978, "learning_rate": 3.2500000000000002e-06, "loss": -0.1044, "num_tokens": 20497834.0, "reward": 1.2782249450683594, "reward_std": 0.37417155504226685, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6854082345962524, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.7139586210250854, "step": 84 }, { "calib/answer_extract_rate": 0.83984375, "calib/avg_num_step_conf": 8.09375, "calib/ece": 0.3080944186046515, "calib/final_conf_rate": 0.83984375, "calib/format_rate": 0.83984375, "calib/frac_conf_gt_0.9": 0.8744186046511628, "calib/gap": 0.064403890571753, "calib/mean_conf": 0.9185102325581397, "calib/mu_c": 0.9412762589928058, "calib/mu_w": 0.8768723684210528, "calib/nonempty_final_conf_rate": 0.83984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2900465116279073, "calib/std_conf": 0.1921334642412437, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 711.48046875, "completions/mean_terminated_length": 843.2361450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.09066666666666667, "grad_norm": 0.534420907497406, "learning_rate": 3.2222222222222227e-06, "loss": -0.243, "num_tokens": 20810837.0, "reward": 1.078109860420227, "reward_std": 0.4689858555793762, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5794956684112549, "rewards/format_reward_step": 0.83984375, "rewards/stepwise_brier_reward": 0.6456356048583984, "step": 85 }, { "calib/answer_extract_rate": 0.69921875, "calib/avg_num_step_conf": 14.1796875, "calib/ece": 0.3012921348314609, "calib/final_conf_rate": 0.6953125, "calib/format_rate": 0.68359375, "calib/frac_conf_gt_0.9": 0.8707865168539326, "calib/gap": 0.03486478912708468, "calib/mean_conf": 0.9527528089887642, "calib/mu_c": 0.9647008547008551, "calib/mu_w": 0.9298360655737704, "calib/nonempty_final_conf_rate": 0.6953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29837078651685417, "calib/std_conf": 0.06700457487296918, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.29296875, "completions/max_length": 2777.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 667.6796875, "completions/mean_terminated_length": 944.3425903320312, "completions/min_length": 0.0, "completions/min_terminated_length": 328.0, "epoch": 0.09173333333333333, "grad_norm": 0.47510606050491333, "learning_rate": 3.1944444444444443e-06, "loss": -0.3591, "num_tokens": 21110315.0, "reward": 0.8938664793968201, "reward_std": 0.6086052656173706, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.4769015610218048, "rewards/format_reward_step": 0.68359375, "rewards/stepwise_brier_reward": 0.5044753551483154, "step": 86 }, { "calib/answer_extract_rate": 0.45703125, "calib/avg_num_step_conf": 26.7734375, "calib/ece": 0.19243589743589756, "calib/final_conf_rate": 0.45703125, "calib/format_rate": 0.45703125, "calib/frac_conf_gt_0.9": 0.8803418803418803, "calib/gap": -0.02857770582793684, "calib/mean_conf": 0.9426923076923079, "calib/mu_c": 0.9370744680851066, "calib/mu_w": 0.9656521739130435, "calib/nonempty_final_conf_rate": 0.45703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16585470085470097, "calib/std_conf": 0.11494989332320599, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5234375, "completions/max_length": 2528.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 482.16796875, "completions/mean_terminated_length": 1011.7622680664062, "completions/min_length": 0.0, "completions/min_terminated_length": 417.0, "epoch": 0.0928, "grad_norm": 0.7267798185348511, "learning_rate": 3.1666666666666667e-06, "loss": -0.5939, "num_tokens": 21362286.0, "reward": 0.6898885369300842, "reward_std": 0.7735934853553772, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.36581942439079285, "rewards/format_reward_step": 0.45703125, "rewards/stepwise_brier_reward": 0.3763526976108551, "step": 87 }, { "calib/answer_extract_rate": 0.69140625, "calib/avg_num_step_conf": 19.7890625, "calib/ece": 0.2280594444444446, "calib/final_conf_rate": 0.703125, "calib/format_rate": 0.6875, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": 0.11028449519230799, "calib/mean_conf": 0.9286072222222223, "calib/mu_c": 0.9604671875000002, "calib/mu_w": 0.8501826923076922, "calib/nonempty_final_conf_rate": 0.703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2227777777777779, "calib/std_conf": 0.14219734175924337, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.296875, "completions/max_length": 2350.0, "completions/max_terminated_length": 2350.0, "completions/mean_length": 742.12109375, "completions/mean_terminated_length": 1055.461181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 539.0, "epoch": 0.09386666666666667, "grad_norm": 0.6653816103935242, "learning_rate": 3.138888888888889e-06, "loss": -0.3913, "num_tokens": 21685157.0, "reward": 0.9765293002128601, "reward_std": 0.737666666507721, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5399715900421143, "rewards/format_reward_step": 0.6875, "rewards/stepwise_brier_reward": 0.5511740446090698, "step": 88 }, { "calib/answer_extract_rate": 0.8046875, "calib/avg_num_step_conf": 11.9375, "calib/ece": 0.4115776699029128, "calib/final_conf_rate": 0.8046875, "calib/format_rate": 0.796875, "calib/frac_conf_gt_0.9": 0.7330097087378641, "calib/gap": 0.04470332577475444, "calib/mean_conf": 0.9261407766990293, "calib/mu_c": 0.9474074074074077, "calib/mu_w": 0.9027040816326533, "calib/nonempty_final_conf_rate": 0.8046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40672330097087395, "calib/std_conf": 0.10845465513640232, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1953125, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 854.83203125, "completions/mean_terminated_length": 1062.3155517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 522.0, "epoch": 0.09493333333333333, "grad_norm": 0.49867215752601624, "learning_rate": 3.1111111111111116e-06, "loss": -0.2103, "num_tokens": 22035922.0, "reward": 0.8709223866462708, "reward_std": 0.5263679027557373, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.4772096872329712, "rewards/format_reward_step": 0.796875, "rewards/stepwise_brier_reward": 0.5230201482772827, "step": 89 }, { "calib/answer_extract_rate": 0.90625, "calib/avg_num_step_conf": 9.1328125, "calib/ece": 0.28369098712446356, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.8369098712446352, "calib/gap": 0.04560380479735304, "calib/mean_conf": 0.9489270386266095, "calib/mu_c": 0.9641935483870969, "calib/mu_w": 0.9185897435897439, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28369098712446356, "calib/std_conf": 0.06361875987449349, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2248.0, "completions/max_terminated_length": 2248.0, "completions/mean_length": 852.30859375, "completions/mean_terminated_length": 932.4402465820312, "completions/min_length": 0.0, "completions/min_terminated_length": 495.0, "epoch": 0.096, "grad_norm": 0.40822866559028625, "learning_rate": 3.0833333333333336e-06, "loss": -0.0858, "num_tokens": 22380473.0, "reward": 1.1848654747009277, "reward_std": 0.36280542612075806, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6330581903457642, "rewards/format_reward_step": 0.890625, "rewards/stepwise_brier_reward": 0.6952204704284668, "step": 90 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.44140625, "calib/ece": 0.20299595141700405, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.6639676113360324, "calib/gap": 0.08791293213828433, "calib/mean_conf": 0.9110931174089069, "calib/mu_c": 0.9363636363636364, "calib/mu_w": 0.8484507042253521, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20076923076923076, "calib/std_conf": 0.10060555232743204, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2042.0, "completions/max_terminated_length": 2042.0, "completions/mean_length": 860.7890625, "completions/mean_terminated_length": 892.1538696289062, "completions/min_length": 0.0, "completions/min_terminated_length": 470.0, "epoch": 0.09706666666666666, "grad_norm": 0.7437296509742737, "learning_rate": 3.055555555555556e-06, "loss": -0.0689, "num_tokens": 22731587.0, "reward": 1.355747938156128, "reward_std": 0.3515569865703583, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.750420331954956, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.787775993347168, "step": 91 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 8.22265625, "calib/ece": 0.1214344262295082, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.5860655737704918, "calib/gap": 0.138848484848485, "calib/mean_conf": 0.8853688524590165, "calib/mu_c": 0.9166666666666666, "calib/mu_w": 0.7778181818181816, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11610655737704918, "calib/std_conf": 0.13771952376679206, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2596.0, "completions/max_terminated_length": 2596.0, "completions/mean_length": 843.82421875, "completions/mean_terminated_length": 885.32373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 412.0, "epoch": 0.09813333333333334, "grad_norm": 0.5318933129310608, "learning_rate": 3.0277777777777776e-06, "loss": -0.0645, "num_tokens": 23077366.0, "reward": 1.4378492832183838, "reward_std": 0.3149486482143402, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.8006671667098999, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.8188130259513855, "step": 92 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 8.53125, "calib/ece": 0.174672131147541, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.5942622950819673, "calib/gap": 0.07115423901940765, "calib/mean_conf": 0.9041803278688525, "calib/mu_c": 0.923426966292135, "calib/mu_w": 0.8522727272727274, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.174672131147541, "calib/std_conf": 0.09896878265835195, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2549.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 825.390625, "completions/mean_terminated_length": 862.4489135742188, "completions/min_length": 0.0, "completions/min_terminated_length": 467.0, "epoch": 0.0992, "grad_norm": 0.45298802852630615, "learning_rate": 3e-06, "loss": -0.0699, "num_tokens": 23417482.0, "reward": 1.3614253997802734, "reward_std": 0.35973337292671204, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7525163888931274, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.7797309160232544, "step": 93 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 8.78515625, "calib/ece": 0.23654320987654331, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.6337448559670782, "calib/gap": 0.10949923312883447, "calib/mean_conf": 0.9073251028806585, "calib/mu_c": 0.9433742331288344, "calib/mu_w": 0.8338749999999999, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23654320987654331, "calib/std_conf": 0.10313125036546462, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2435.0, "completions/max_terminated_length": 2435.0, "completions/mean_length": 847.62890625, "completions/mean_terminated_length": 892.9752807617188, "completions/min_length": 0.0, "completions/min_terminated_length": 486.0, "epoch": 0.10026666666666667, "grad_norm": 0.4858059883117676, "learning_rate": 2.9722222222222225e-06, "loss": -0.0611, "num_tokens": 23766195.0, "reward": 1.279847264289856, "reward_std": 0.2897565960884094, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.7145347595214844, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.7668822407722473, "step": 94 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 9.16015625, "calib/ece": 0.22775100401606432, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.6305220883534136, "calib/gap": 0.11558441558441579, "calib/mean_conf": 0.8904016064257029, "calib/mu_c": 0.9293939393939395, "calib/mu_w": 0.8138095238095238, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22775100401606432, "calib/std_conf": 0.13193584663745286, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 870.45703125, "completions/mean_terminated_length": 894.9276733398438, "completions/min_length": 0.0, "completions/min_terminated_length": 559.0, "epoch": 0.10133333333333333, "grad_norm": 0.44509634375572205, "learning_rate": 2.944444444444445e-06, "loss": -0.0252, "num_tokens": 24118200.0, "reward": 1.3045127391815186, "reward_std": 0.35857391357421875, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7381042838096619, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.774654746055603, "step": 95 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 8.953125, "calib/ece": 0.1099601593625499, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.7808764940239044, "calib/gap": 0.15298029556650283, "calib/mean_conf": 0.9187250996015938, "calib/mu_c": 0.9479802955665028, "calib/mu_w": 0.7949999999999999, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1099601593625499, "calib/std_conf": 0.12109053002462027, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2716.0, "completions/max_terminated_length": 2716.0, "completions/mean_length": 864.4453125, "completions/mean_terminated_length": 881.6653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 452.0, "epoch": 0.1024, "grad_norm": 0.5172838568687439, "learning_rate": 2.916666666666667e-06, "loss": -0.0033, "num_tokens": 24468354.0, "reward": 1.5302934646606445, "reward_std": 0.2607869803905487, "rewards/accuracy_reward_step": 0.79296875, "rewards/final_brier_reward_step": 0.8489906191825867, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8591302633285522, "step": 96 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.375, "calib/ece": 0.2555465587044535, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03263424066636922, "calib/mean_conf": 0.9276113360323887, "calib/mu_c": 0.9383132530120483, "calib/mu_w": 0.905679012345679, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2555465587044535, "calib/std_conf": 0.09808470866513047, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2167.0, "completions/max_terminated_length": 2167.0, "completions/mean_length": 834.4140625, "completions/mean_terminated_length": 861.3306274414062, "completions/min_length": 0.0, "completions/min_terminated_length": 428.0, "epoch": 0.10346666666666667, "grad_norm": 0.4137607514858246, "learning_rate": 2.888888888888889e-06, "loss": -0.0363, "num_tokens": 24810076.0, "reward": 1.275789737701416, "reward_std": 0.3736746907234192, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6937867403030396, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7358976602554321, "step": 97 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 8.05859375, "calib/ece": 0.3240408163265308, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.8448979591836735, "calib/gap": 0.058419754825982606, "calib/mean_conf": 0.9403673469387757, "calib/mu_c": 0.9627814569536424, "calib/mu_w": 0.9043617021276598, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3240408163265308, "calib/std_conf": 0.1038300601114975, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2224.0, "completions/max_terminated_length": 2224.0, "completions/mean_length": 822.93359375, "completions/mean_terminated_length": 856.3861694335938, "completions/min_length": 0.0, "completions/min_terminated_length": 493.0, "epoch": 0.10453333333333334, "grad_norm": 0.44194352626800537, "learning_rate": 2.861111111111111e-06, "loss": -0.0363, "num_tokens": 25149971.0, "reward": 1.183180332183838, "reward_std": 0.3613991141319275, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6462027430534363, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.6996912360191345, "step": 98 }, { "calib/answer_extract_rate": 0.91796875, "calib/avg_num_step_conf": 7.71875, "calib/ece": 0.4068510638297875, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.7404255319148936, "calib/gap": 0.07175891046073613, "calib/mean_conf": 0.9132340425531916, "calib/mu_c": 0.9486554621848742, "calib/mu_w": 0.876896551724138, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4068510638297875, "calib/std_conf": 0.12783203953026123, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2543.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 853.5234375, "completions/mean_terminated_length": 925.85595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 392.0, "epoch": 0.1056, "grad_norm": 0.5108424425125122, "learning_rate": 2.8333333333333335e-06, "loss": -0.1032, "num_tokens": 25497313.0, "reward": 0.9885514974594116, "reward_std": 0.41032248735427856, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5544949769973755, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.6186538338661194, "step": 99 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 8.37109375, "calib/ece": 0.2564754098360657, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.7663934426229508, "calib/gap": 0.07421036585365859, "calib/mean_conf": 0.9097540983606558, "calib/mu_c": 0.9340853658536585, "calib/mu_w": 0.859875, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.247049180327869, "calib/std_conf": 0.15833644980124587, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 897.53515625, "completions/mean_terminated_length": 937.8326416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 505.0, "epoch": 0.10666666666666667, "grad_norm": 0.47045427560806274, "learning_rate": 2.805555555555556e-06, "loss": -0.0633, "num_tokens": 25857530.0, "reward": 1.2721636295318604, "reward_std": 0.27989959716796875, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6965453028678894, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7518138885498047, "step": 100 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.61328125, "calib/ece": 0.30084337349397616, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8674698795180723, "calib/gap": 0.05504682843763298, "calib/mean_conf": 0.9514457831325303, "calib/mu_c": 0.9706790123456792, "calib/mu_w": 0.9156321839080462, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30084337349397616, "calib/std_conf": 0.07664866694775996, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1815.0, "completions/max_terminated_length": 1815.0, "completions/mean_length": 904.30859375, "completions/mean_terminated_length": 929.7308959960938, "completions/min_length": 0.0, "completions/min_terminated_length": 516.0, "epoch": 0.10773333333333333, "grad_norm": 0.6130152344703674, "learning_rate": 2.7777777777777783e-06, "loss": -0.035, "num_tokens": 26219065.0, "reward": 1.2515060901641846, "reward_std": 0.4860761761665344, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.682148814201355, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7214141488075256, "step": 101 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.16015625, "calib/ece": 0.26318548387096796, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9233870967741935, "calib/gap": 0.037605394990366414, "calib/mean_conf": 0.9607661290322583, "calib/mu_c": 0.9721387283236997, "calib/mu_w": 0.9345333333333333, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26318548387096796, "calib/std_conf": 0.07202022664714358, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1636.0, "completions/max_terminated_length": 1636.0, "completions/mean_length": 842.8203125, "completions/mean_terminated_length": 870.008056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 458.0, "epoch": 0.1088, "grad_norm": 0.40492483973503113, "learning_rate": 2.7500000000000004e-06, "loss": -0.0512, "num_tokens": 26564563.0, "reward": 1.3133814334869385, "reward_std": 0.3594001531600952, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.707624614238739, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7476516366004944, "step": 102 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 9.2578125, "calib/ece": 0.364291666666667, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.9083333333333333, "calib/gap": 0.04623386922356032, "calib/mean_conf": 0.9601250000000002, "calib/mu_c": 0.9788111888111892, "calib/mu_w": 0.9325773195876289, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.364291666666667, "calib/std_conf": 0.062898603919324, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2391.0, "completions/max_terminated_length": 2391.0, "completions/mean_length": 978.015625, "completions/mean_terminated_length": 1038.8880615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 601.0, "epoch": 0.10986666666666667, "grad_norm": 0.36557236313819885, "learning_rate": 2.7222222222222224e-06, "loss": -0.09, "num_tokens": 26942527.0, "reward": 1.1214299201965332, "reward_std": 0.4106621742248535, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6044878959655762, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.6673686504364014, "step": 103 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 10.42578125, "calib/ece": 0.3468333333333336, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.8458333333333333, "calib/gap": 0.06278422608319545, "calib/mean_conf": 0.9426666666666669, "calib/mu_c": 0.9680419580419585, "calib/mu_w": 0.905257731958763, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3468333333333336, "calib/std_conf": 0.1027880775619862, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2647.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 965.890625, "completions/mean_terminated_length": 1013.3933715820312, "completions/min_length": 0.0, "completions/min_terminated_length": 562.0, "epoch": 0.11093333333333333, "grad_norm": 0.43601280450820923, "learning_rate": 2.6944444444444444e-06, "loss": -0.0696, "num_tokens": 27319515.0, "reward": 1.1289482116699219, "reward_std": 0.4071006178855896, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6174039244651794, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.6716098785400391, "step": 104 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 11.06640625, "calib/ece": 0.30672653061224525, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.013188949111713533, "calib/mean_conf": 0.9383346938775512, "calib/mu_c": 0.9426951219512197, "calib/mu_w": 0.9295061728395062, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2878367346938779, "calib/std_conf": 0.1234164106956118, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2276.0, "completions/max_terminated_length": 2276.0, "completions/mean_length": 984.0234375, "completions/mean_terminated_length": 1019.8785400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 673.0, "epoch": 0.112, "grad_norm": 0.40076950192451477, "learning_rate": 2.666666666666667e-06, "loss": -0.034, "num_tokens": 27700225.0, "reward": 1.2418036460876465, "reward_std": 0.3676682710647583, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6670179963111877, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.6878659725189209, "step": 105 }, { "calib/answer_extract_rate": 0.9140625, "calib/avg_num_step_conf": 10.40234375, "calib/ece": 0.280726495726496, "calib/final_conf_rate": 0.9140625, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.8717948717948718, "calib/gap": 0.04368254497002011, "calib/mean_conf": 0.9525213675213677, "calib/mu_c": 0.9667088607594939, "calib/mu_w": 0.9230263157894738, "calib/nonempty_final_conf_rate": 0.9140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2790170940170943, "calib/std_conf": 0.08555531007418866, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2667.0, "completions/max_terminated_length": 2667.0, "completions/mean_length": 905.91015625, "completions/mean_terminated_length": 978.5358276367188, "completions/min_length": 0.0, "completions/min_terminated_length": 619.0, "epoch": 0.11306666666666666, "grad_norm": 0.37359946966171265, "learning_rate": 2.6388888888888893e-06, "loss": -0.0989, "num_tokens": 28059762.0, "reward": 1.206790566444397, "reward_std": 0.32431334257125854, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6541393995285034, "rewards/format_reward_step": 0.9140625, "rewards/stepwise_brier_reward": 0.6845083832740784, "step": 106 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 10.4453125, "calib/ece": 0.31365461847389586, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9156626506024096, "calib/gap": 0.030393784589186734, "calib/mean_conf": 0.9642570281124498, "calib/mu_c": 0.9748765432098767, "calib/mu_w": 0.94448275862069, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31365461847389586, "calib/std_conf": 0.056457950738666114, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1644.0, "completions/max_terminated_length": 1644.0, "completions/mean_length": 893.71484375, "completions/mean_terminated_length": 918.8392944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 525.0, "epoch": 0.11413333333333334, "grad_norm": 0.31740349531173706, "learning_rate": 2.6111111111111113e-06, "loss": -0.0245, "num_tokens": 28416209.0, "reward": 1.2470124959945679, "reward_std": 0.4013622999191284, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6662039160728455, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7353296875953674, "step": 107 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 9.640625, "calib/ece": 0.24119521912350617, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9083665338645418, "calib/gap": 0.03851799687010993, "calib/mean_conf": 0.9583266932270917, "calib/mu_c": 0.9692222222222225, "calib/mu_w": 0.9307042253521126, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24119521912350617, "calib/std_conf": 0.07658282528158045, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1755.0, "completions/max_terminated_length": 1755.0, "completions/mean_length": 881.37109375, "completions/mean_terminated_length": 898.9282836914062, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.1152, "grad_norm": 0.35262876749038696, "learning_rate": 2.5833333333333337e-06, "loss": -0.0307, "num_tokens": 28768112.0, "reward": 1.3585948944091797, "reward_std": 0.3439452350139618, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7341094017028809, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7614732980728149, "step": 108 }, { "calib/answer_extract_rate": 0.93359375, "calib/avg_num_step_conf": 8.421875, "calib/ece": 0.34769874476987467, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.7949790794979079, "calib/gap": 0.08079352517985616, "calib/mean_conf": 0.9292887029288707, "calib/mu_c": 0.9630935251798564, "calib/mu_w": 0.8823000000000002, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34769874476987467, "calib/std_conf": 0.11007755120020576, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2230.0, "completions/max_terminated_length": 2230.0, "completions/mean_length": 827.61328125, "completions/mean_terminated_length": 882.7875366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 535.0, "epoch": 0.11626666666666667, "grad_norm": 1.1841824054718018, "learning_rate": 2.5555555555555557e-06, "loss": -0.0787, "num_tokens": 29107621.0, "reward": 1.1153641939163208, "reward_std": 0.3671267330646515, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6189414262771606, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.6782614588737488, "step": 109 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 8.17578125, "calib/ece": 0.23210084033613476, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.9033613445378151, "calib/gap": 0.025639367816092107, "calib/mean_conf": 0.95436974789916, "calib/mu_c": 0.9612643678160923, "calib/mu_w": 0.9356250000000002, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22768907563025242, "calib/std_conf": 0.08855173349505058, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 713.41015625, "completions/mean_terminated_length": 764.15478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 311.0, "epoch": 0.11733333333333333, "grad_norm": 0.5043585896492004, "learning_rate": 2.5277777777777778e-06, "loss": -0.0858, "num_tokens": 29418214.0, "reward": 1.3116428852081299, "reward_std": 0.4207128584384918, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.7026492357254028, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.7506482005119324, "step": 110 }, { "calib/answer_extract_rate": 0.921875, "calib/avg_num_step_conf": 7.4375, "calib/ece": 0.22889830508474598, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.8601694915254238, "calib/gap": 0.11674563915647018, "calib/mean_conf": 0.936525423728814, "calib/mu_c": 0.970658682634731, "calib/mu_w": 0.8539130434782608, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22889830508474598, "calib/std_conf": 0.12869868316556027, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2474.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 727.78125, "completions/mean_terminated_length": 782.8235473632812, "completions/min_length": 0.0, "completions/min_terminated_length": 420.0, "epoch": 0.1184, "grad_norm": 0.3411725163459778, "learning_rate": 2.5e-06, "loss": -0.1006, "num_tokens": 29734974.0, "reward": 1.2854728698730469, "reward_std": 0.3727434575557709, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.712110161781311, "rewards/format_reward_step": 0.921875, "rewards/stepwise_brier_reward": 0.7395458221435547, "step": 111 }, { "calib/answer_extract_rate": 0.90625, "calib/avg_num_step_conf": 6.8046875, "calib/ece": 0.25693965517241396, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.7327586206896551, "calib/gap": 0.10153679653679648, "calib/mean_conf": 0.9207327586206897, "calib/mu_c": 0.95487012987013, "calib/mu_w": 0.8533333333333335, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25693965517241396, "calib/std_conf": 0.11295741169975192, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 1881.0, "completions/max_terminated_length": 1881.0, "completions/mean_length": 734.1171875, "completions/mean_terminated_length": 810.0603637695312, "completions/min_length": 0.0, "completions/min_terminated_length": 463.0, "epoch": 0.11946666666666667, "grad_norm": 0.35230952501296997, "learning_rate": 2.4722222222222226e-06, "loss": -0.1798, "num_tokens": 30053868.0, "reward": 1.2051305770874023, "reward_std": 0.3573354482650757, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6736800670623779, "rewards/format_reward_step": 0.90625, "rewards/stepwise_brier_reward": 0.7044122815132141, "step": 112 }, { "calib/answer_extract_rate": 0.8828125, "calib/avg_num_step_conf": 6.85546875, "calib/ece": 0.2870353982300886, "calib/final_conf_rate": 0.8828125, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.7610619469026548, "calib/gap": 0.08733844189016637, "calib/mean_conf": 0.928628318584071, "calib/mu_c": 0.9599310344827588, "calib/mu_w": 0.8725925925925925, "calib/nonempty_final_conf_rate": 0.8828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2870353982300886, "calib/std_conf": 0.10107113894312923, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 658.3359375, "completions/mean_terminated_length": 745.7256469726562, "completions/min_length": 0.0, "completions/min_terminated_length": 434.0, "epoch": 0.12053333333333334, "grad_norm": 0.39759573340415955, "learning_rate": 2.4444444444444447e-06, "loss": -0.0975, "num_tokens": 30350642.0, "reward": 1.137017846107483, "reward_std": 0.3805588483810425, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6296113133430481, "rewards/format_reward_step": 0.87890625, "rewards/stepwise_brier_reward": 0.6716611385345459, "step": 113 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 6.89453125, "calib/ece": 0.17225108225108265, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.8658008658008658, "calib/gap": 0.06126077348066328, "calib/mean_conf": 0.9558008658008661, "calib/mu_c": 0.9690607734806633, "calib/mu_w": 0.9078, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17225108225108265, "calib/std_conf": 0.07070537736877427, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2000.0, "completions/max_terminated_length": 2000.0, "completions/mean_length": 685.60546875, "completions/mean_terminated_length": 759.80517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 366.0, "epoch": 0.1216, "grad_norm": 0.6093668937683105, "learning_rate": 2.4166666666666667e-06, "loss": -0.1186, "num_tokens": 30654221.0, "reward": 1.3461461067199707, "reward_std": 0.2577363848686218, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7367730140686035, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.7219762802124023, "step": 114 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 6.59765625, "calib/ece": 0.334675324675325, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.8961038961038961, "calib/gap": 0.0193078162771958, "calib/mean_conf": 0.9609090909090913, "calib/mu_c": 0.9680136986301373, "calib/mu_w": 0.9487058823529415, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3317748917748921, "calib/std_conf": 0.05808297881632693, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 1601.0, "completions/max_terminated_length": 1601.0, "completions/mean_length": 697.33203125, "completions/mean_terminated_length": 772.8008422851562, "completions/min_length": 0.0, "completions/min_terminated_length": 432.0, "epoch": 0.12266666666666666, "grad_norm": 0.4685302674770355, "learning_rate": 2.388888888888889e-06, "loss": -0.1102, "num_tokens": 30961042.0, "reward": 1.121100664138794, "reward_std": 0.5016540288925171, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.599951982498169, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.6423114538192749, "step": 115 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 6.71875, "calib/ece": 0.2640163934426233, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.8934426229508197, "calib/gap": 0.029723370429252882, "calib/mean_conf": 0.9605737704918036, "calib/mu_c": 0.9695882352941179, "calib/mu_w": 0.939864864864865, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2639344262295085, "calib/std_conf": 0.06025627210843981, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2797.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 721.35546875, "completions/mean_terminated_length": 750.6788330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 0.12373333333333333, "grad_norm": 0.43929150700569153, "learning_rate": 2.361111111111111e-06, "loss": -0.0408, "num_tokens": 31273269.0, "reward": 1.2943837642669678, "reward_std": 0.3709738850593567, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6938859224319458, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7522628307342529, "step": 116 }, { "calib/answer_extract_rate": 0.92578125, "calib/avg_num_step_conf": 6.43359375, "calib/ece": 0.348649789029536, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.869198312236287, "calib/gap": 0.037708333333333344, "calib/mean_conf": 0.9562447257383969, "calib/mu_c": 0.9710416666666669, "calib/mu_w": 0.9333333333333336, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.348649789029536, "calib/std_conf": 0.0659163837386271, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 1868.0, "completions/max_terminated_length": 1868.0, "completions/mean_length": 687.9375, "completions/mean_terminated_length": 743.0885620117188, "completions/min_length": 0.0, "completions/min_terminated_length": 393.0, "epoch": 0.1248, "grad_norm": 0.35367223620414734, "learning_rate": 2.3333333333333336e-06, "loss": -0.118, "num_tokens": 31579021.0, "reward": 1.1236909627914429, "reward_std": 0.45129209756851196, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6051425933837891, "rewards/format_reward_step": 0.92578125, "rewards/stepwise_brier_reward": 0.6641662120819092, "step": 117 }, { "calib/answer_extract_rate": 0.91015625, "calib/avg_num_step_conf": 6.203125, "calib/ece": 0.25660944206008607, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.8454935622317596, "calib/gap": 0.052954268822813955, "calib/mean_conf": 0.9518884120171676, "calib/mu_c": 0.968024691358025, "calib/mu_w": 0.9150704225352111, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25660944206008607, "calib/std_conf": 0.07061863663345236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 1695.0, "completions/max_terminated_length": 1695.0, "completions/mean_length": 668.14453125, "completions/mean_terminated_length": 734.0986938476562, "completions/min_length": 0.0, "completions/min_terminated_length": 416.0, "epoch": 0.12586666666666665, "grad_norm": 0.4721923768520355, "learning_rate": 2.305555555555556e-06, "loss": -0.1234, "num_tokens": 31877114.0, "reward": 1.2356454133987427, "reward_std": 0.41479313373565674, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6732761859893799, "rewards/format_reward_step": 0.91015625, "rewards/stepwise_brier_reward": 0.7007163763046265, "step": 118 }, { "calib/answer_extract_rate": 0.890625, "calib/avg_num_step_conf": 5.796875, "calib/ece": 0.24078947368421072, "calib/final_conf_rate": 0.890625, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.8114035087719298, "calib/gap": 0.08124145474432654, "calib/mean_conf": 0.9320175438596494, "calib/mu_c": 0.956603773584906, "calib/mu_w": 0.8753623188405795, "calib/nonempty_final_conf_rate": 0.890625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23771929824561422, "calib/std_conf": 0.13266618062523194, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2246.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 668.76171875, "completions/mean_terminated_length": 744.36083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 383.0, "epoch": 0.12693333333333334, "grad_norm": 0.31884142756462097, "learning_rate": 2.277777777777778e-06, "loss": -0.1035, "num_tokens": 32176421.0, "reward": 1.2189477682113647, "reward_std": 0.46411794424057007, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6684898138046265, "rewards/format_reward_step": 0.890625, "rewards/stepwise_brier_reward": 0.6981865763664246, "step": 119 }, { "calib/answer_extract_rate": 0.88671875, "calib/avg_num_step_conf": 5.7265625, "calib/ece": 0.28224669603524255, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.8986784140969163, "calib/gap": 0.04297367016545117, "calib/mean_conf": 0.960660792951542, "calib/mu_c": 0.9744805194805198, "calib/mu_w": 0.9315068493150687, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28224669603524255, "calib/std_conf": 0.06332911311975102, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 1854.0, "completions/max_terminated_length": 1854.0, "completions/mean_length": 656.28125, "completions/mean_terminated_length": 736.877197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 357.0, "epoch": 0.128, "grad_norm": 0.3318020701408386, "learning_rate": 2.25e-06, "loss": -0.0986, "num_tokens": 32474157.0, "reward": 1.182955265045166, "reward_std": 0.4572509527206421, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.635696530342102, "rewards/format_reward_step": 0.88671875, "rewards/stepwise_brier_reward": 0.6994906067848206, "step": 120 }, { "calib/answer_extract_rate": 0.8984375, "calib/avg_num_step_conf": 5.74609375, "calib/ece": 0.30278260869565243, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.8782608695652174, "calib/gap": 0.04957500000000026, "calib/mean_conf": 0.9549565217391306, "calib/mu_c": 0.9722000000000003, "calib/mu_w": 0.922625, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30278260869565243, "calib/std_conf": 0.07149185314539393, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 673.86328125, "completions/mean_terminated_length": 746.792236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 373.0, "epoch": 0.12906666666666666, "grad_norm": 0.36813440918922424, "learning_rate": 2.222222222222222e-06, "loss": -0.1444, "num_tokens": 32774762.0, "reward": 1.1619811058044434, "reward_std": 0.45201846957206726, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6278820633888245, "rewards/format_reward_step": 0.8984375, "rewards/stepwise_brier_reward": 0.6890350580215454, "step": 121 }, { "calib/answer_extract_rate": 0.93359375, "calib/avg_num_step_conf": 5.96484375, "calib/ece": 0.2530962343096237, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.8577405857740585, "calib/gap": 0.07657684630738548, "calib/mean_conf": 0.9518410041841007, "calib/mu_c": 0.9749101796407189, "calib/mu_w": 0.8983333333333334, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2530962343096237, "calib/std_conf": 0.07904741329596059, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2748.0, "completions/max_terminated_length": 2748.0, "completions/mean_length": 681.59765625, "completions/mean_terminated_length": 730.0794677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 383.0, "epoch": 0.13013333333333332, "grad_norm": 0.4585769772529602, "learning_rate": 2.1944444444444445e-06, "loss": -0.0791, "num_tokens": 33079635.0, "reward": 1.2849833965301514, "reward_std": 0.2783547639846802, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7015323638916016, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.7540565133094788, "step": 122 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 5.55078125, "calib/ece": 0.27935064935064957, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.7229437229437229, "calib/gap": 0.0464257161892071, "calib/mean_conf": 0.9233333333333336, "calib/mu_c": 0.9392105263157896, "calib/mu_w": 0.8927848101265825, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27233766233766254, "calib/std_conf": 0.10917108645756951, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 742.8359375, "completions/mean_terminated_length": 823.2294311523438, "completions/min_length": 0.0, "completions/min_terminated_length": 310.0, "epoch": 0.1312, "grad_norm": 0.30729711055755615, "learning_rate": 2.166666666666667e-06, "loss": -0.1733, "num_tokens": 33398129.0, "reward": 1.1760551929473877, "reward_std": 0.47516506910324097, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6438636779785156, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.6805557608604431, "step": 123 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.16931034482758647, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.8491379310344828, "calib/gap": 0.11721367521367587, "calib/mean_conf": 0.9451724137931036, "calib/mu_c": 0.9714444444444449, "calib/mu_w": 0.854230769230769, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16931034482758647, "calib/std_conf": 0.10047287126230067, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 1871.0, "completions/max_terminated_length": 1871.0, "completions/mean_length": 694.0625, "completions/mean_terminated_length": 765.862060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 391.0, "epoch": 0.13226666666666667, "grad_norm": 0.4728807210922241, "learning_rate": 2.138888888888889e-06, "loss": -0.1137, "num_tokens": 33705665.0, "reward": 1.3616459369659424, "reward_std": 0.38314908742904663, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7490648031234741, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.7750166654586792, "step": 124 }, { "calib/answer_extract_rate": 0.86328125, "calib/avg_num_step_conf": 6.12109375, "calib/ece": 0.21452488687782828, "calib/final_conf_rate": 0.86328125, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 0.7918552036199095, "calib/gap": 0.042329985352584365, "calib/mean_conf": 0.9408597285067877, "calib/mu_c": 0.9521604938271606, "calib/mu_w": 0.9098305084745762, "calib/nonempty_final_conf_rate": 0.86328125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21117647058823552, "calib/std_conf": 0.08976935381851739, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 2943.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 675.3046875, "completions/mean_terminated_length": 778.729736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 313.0, "epoch": 0.13333333333333333, "grad_norm": 0.4558548331260681, "learning_rate": 2.1111111111111114e-06, "loss": -0.1445, "num_tokens": 34006391.0, "reward": 1.2194066047668457, "reward_std": 0.4491131007671356, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6643987894058228, "rewards/format_reward_step": 0.86328125, "rewards/stepwise_brier_reward": 0.6722660064697266, "step": 125 }, { "calib/answer_extract_rate": 0.90625, "calib/avg_num_step_conf": 6.60546875, "calib/ece": 0.3132327586206898, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.6767241379310345, "calib/gap": 0.08548674606223583, "calib/mean_conf": 0.8980603448275863, "calib/mu_c": 0.933065693430657, "calib/mu_w": 0.8475789473684212, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3103879310344829, "calib/std_conf": 0.1507771127784722, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2082.0, "completions/max_terminated_length": 2082.0, "completions/mean_length": 750.828125, "completions/mean_terminated_length": 828.5, "completions/min_length": 0.0, "completions/min_terminated_length": 388.0, "epoch": 0.1344, "grad_norm": 0.34735116362571716, "learning_rate": 2.0833333333333334e-06, "loss": -0.1487, "num_tokens": 34327107.0, "reward": 1.1019737720489502, "reward_std": 0.4112528860569, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6155366897583008, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.675259530544281, "step": 126 }, { "calib/answer_extract_rate": 0.828125, "calib/avg_num_step_conf": 6.5546875, "calib/ece": 0.2346226415094343, "calib/final_conf_rate": 0.828125, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.7216981132075472, "calib/gap": 0.04985702614379106, "calib/mean_conf": 0.9101886792452832, "calib/mu_c": 0.9261805555555556, "calib/mu_w": 0.8763235294117645, "calib/nonempty_final_conf_rate": 0.828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23278301886792485, "calib/std_conf": 0.13095787261612948, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.171875, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 708.14453125, "completions/mean_terminated_length": 855.117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 456.0, "epoch": 0.13546666666666668, "grad_norm": 0.4462392330169678, "learning_rate": 2.0555555555555555e-06, "loss": -0.2065, "num_tokens": 34635104.0, "reward": 1.1081023216247559, "reward_std": 0.45589950680732727, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.60732102394104, "rewards/format_reward_step": 0.828125, "rewards/stepwise_brier_reward": 0.636517345905304, "step": 127 }, { "calib/answer_extract_rate": 0.85546875, "calib/avg_num_step_conf": 6.9296875, "calib/ece": 0.25981735159817365, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.6940639269406392, "calib/gap": 0.13903062132811095, "calib/mean_conf": 0.8853881278538814, "calib/mu_c": 0.9374452554744526, "calib/mu_w": 0.7984146341463416, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25981735159817365, "calib/std_conf": 0.1690348915527457, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14453125, "completions/max_length": 2035.0, "completions/max_terminated_length": 2035.0, "completions/mean_length": 780.609375, "completions/mean_terminated_length": 912.4931030273438, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.13653333333333334, "grad_norm": 0.46783876419067383, "learning_rate": 2.027777777777778e-06, "loss": -0.2206, "num_tokens": 34964644.0, "reward": 1.098130464553833, "reward_std": 0.49286070466041565, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6286163926124573, "rewards/format_reward_step": 0.85546875, "rewards/stepwise_brier_reward": 0.6524767279624939, "step": 128 }, { "calib/answer_extract_rate": 0.88671875, "calib/avg_num_step_conf": 7.5625, "calib/ece": 0.18629955947136573, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.6828193832599119, "calib/gap": 0.0761440185830432, "calib/mean_conf": 0.8869162995594716, "calib/mu_c": 0.908048780487805, "calib/mu_w": 0.8319047619047618, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17537444933920712, "calib/std_conf": 0.16700050660309387, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 824.41015625, "completions/mean_terminated_length": 929.7312622070312, "completions/min_length": 0.0, "completions/min_terminated_length": 436.0, "epoch": 0.1376, "grad_norm": 0.38593611121177673, "learning_rate": 2.0000000000000003e-06, "loss": -0.2057, "num_tokens": 35301117.0, "reward": 1.2500499486923218, "reward_std": 0.3510856628417969, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6872902512550354, "rewards/format_reward_step": 0.88671875, "rewards/stepwise_brier_reward": 0.7084317803382874, "step": 129 }, { "calib/answer_extract_rate": 0.91015625, "calib/avg_num_step_conf": 7.9375, "calib/ece": 0.1750214592274681, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.7510729613733905, "calib/gap": 0.20511996336996352, "calib/mean_conf": 0.896051502145923, "calib/mu_c": 0.9532738095238097, "calib/mu_w": 0.7481538461538462, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1750214592274681, "calib/std_conf": 0.17012451110542373, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 1702.0, "completions/max_terminated_length": 1702.0, "completions/mean_length": 816.72265625, "completions/mean_terminated_length": 897.3433227539062, "completions/min_length": 0.0, "completions/min_terminated_length": 361.0, "epoch": 0.13866666666666666, "grad_norm": 0.5547916889190674, "learning_rate": 1.9722222222222224e-06, "loss": -0.1407, "num_tokens": 35638526.0, "reward": 1.314697265625, "reward_std": 0.34050452709198, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7479640245437622, "rewards/format_reward_step": 0.91015625, "rewards/stepwise_brier_reward": 0.7737981081008911, "step": 130 }, { "calib/answer_extract_rate": 0.83984375, "calib/avg_num_step_conf": 7.90234375, "calib/ece": 0.30102325581395367, "calib/final_conf_rate": 0.83984375, "calib/format_rate": 0.83984375, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": 0.13081495098039242, "calib/mean_conf": 0.8468837209302328, "calib/mu_c": 0.9052941176470591, "calib/mu_w": 0.7744791666666667, "calib/nonempty_final_conf_rate": 0.83984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29720930232558157, "calib/std_conf": 0.19433622378374746, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16015625, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 865.578125, "completions/mean_terminated_length": 1030.641845703125, "completions/min_length": 0.0, "completions/min_terminated_length": 544.0, "epoch": 0.13973333333333332, "grad_norm": 0.3589726686477661, "learning_rate": 1.944444444444445e-06, "loss": -0.1978, "num_tokens": 35989362.0, "reward": 0.9909417629241943, "reward_std": 0.3934427499771118, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.58257657289505, "rewards/format_reward_step": 0.83984375, "rewards/stepwise_brier_reward": 0.603301465511322, "step": 131 }, { "calib/answer_extract_rate": 0.765625, "calib/avg_num_step_conf": 7.29296875, "calib/ece": 0.12122448979591849, "calib/final_conf_rate": 0.765625, "calib/format_rate": 0.765625, "calib/frac_conf_gt_0.9": 0.6887755102040817, "calib/gap": 0.20253333333333368, "calib/mean_conf": 0.8700000000000002, "calib/mu_c": 0.9175333333333336, "calib/mu_w": 0.715, "calib/nonempty_final_conf_rate": 0.765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11295918367346952, "calib/std_conf": 0.19551345274451482, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.234375, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 810.42578125, "completions/mean_terminated_length": 1058.5152587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 522.0, "epoch": 0.1408, "grad_norm": 0.35102054476737976, "learning_rate": 1.916666666666667e-06, "loss": -0.2885, "num_tokens": 36325463.0, "reward": 1.1497254371643066, "reward_std": 0.6093935966491699, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6461539268493652, "rewards/format_reward_step": 0.765625, "rewards/stepwise_brier_reward": 0.656593918800354, "step": 132 }, { "calib/answer_extract_rate": 0.72265625, "calib/avg_num_step_conf": 7.2421875, "calib/ece": 0.2776630434782611, "calib/final_conf_rate": 0.71875, "calib/format_rate": 0.71875, "calib/frac_conf_gt_0.9": 0.5760869565217391, "calib/gap": 0.10685245115665865, "calib/mean_conf": 0.8336413043478261, "calib/mu_c": 0.8806796116504858, "calib/mu_w": 0.7738271604938272, "calib/nonempty_final_conf_rate": 0.71875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27576086956521767, "calib/std_conf": 0.20506176596130307, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.28125, "completions/max_length": 2892.0, "completions/max_terminated_length": 2892.0, "completions/mean_length": 848.58203125, "completions/mean_terminated_length": 1180.6358642578125, "completions/min_length": 0.0, "completions/min_terminated_length": 675.0, "epoch": 0.14186666666666667, "grad_norm": 0.3158020079135895, "learning_rate": 1.888888888888889e-06, "loss": -0.3075, "num_tokens": 36672084.0, "reward": 0.8508057594299316, "reward_std": 0.6803976893424988, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.4953535199165344, "rewards/format_reward_step": 0.71875, "rewards/stepwise_brier_reward": 0.5156407952308655, "step": 133 }, { "calib/answer_extract_rate": 0.67578125, "calib/avg_num_step_conf": 7.18359375, "calib/ece": 0.20092485549132955, "calib/final_conf_rate": 0.67578125, "calib/format_rate": 0.67578125, "calib/frac_conf_gt_0.9": 0.5028901734104047, "calib/gap": 0.20161716171617194, "calib/mean_conf": 0.7610404624277458, "calib/mu_c": 0.8449504950495053, "calib/mu_w": 0.6433333333333333, "calib/nonempty_final_conf_rate": 0.67578125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18907514450867058, "calib/std_conf": 0.27433801264790336, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.32421875, "completions/max_length": 2559.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 866.5546875, "completions/mean_terminated_length": 1282.300537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 624.0, "epoch": 0.14293333333333333, "grad_norm": 0.3706478476524353, "learning_rate": 1.8611111111111113e-06, "loss": -0.4715, "num_tokens": 37025914.0, "reward": 0.8468378782272339, "reward_std": 0.6407945156097412, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.5057078003883362, "rewards/format_reward_step": 0.67578125, "rewards/stepwise_brier_reward": 0.5274984836578369, "step": 134 }, { "calib/answer_extract_rate": 0.73828125, "calib/avg_num_step_conf": 8.42578125, "calib/ece": 0.1592592592592594, "calib/final_conf_rate": 0.73828125, "calib/format_rate": 0.73828125, "calib/frac_conf_gt_0.9": 0.6878306878306878, "calib/gap": 0.12027653003930394, "calib/mean_conf": 0.8639153439153441, "calib/mu_c": 0.897007299270073, "calib/mu_w": 0.7767307692307691, "calib/nonempty_final_conf_rate": 0.73828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1491534391534393, "calib/std_conf": 0.19491013592760018, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.26171875, "completions/max_length": 2817.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 903.921875, "completions/mean_terminated_length": 1224.3597412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 721.0, "epoch": 0.144, "grad_norm": 0.31145983934402466, "learning_rate": 1.8333333333333333e-06, "loss": -0.3567, "num_tokens": 37386238.0, "reward": 1.0521256923675537, "reward_std": 0.5641303062438965, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5841398239135742, "rewards/format_reward_step": 0.73828125, "rewards/stepwise_brier_reward": 0.6042855978012085, "step": 135 }, { "calib/answer_extract_rate": 0.65234375, "calib/avg_num_step_conf": 7.8671875, "calib/ece": 0.13646706586826352, "calib/final_conf_rate": 0.65234375, "calib/format_rate": 0.65234375, "calib/frac_conf_gt_0.9": 0.5209580838323353, "calib/gap": 0.29955586080586094, "calib/mean_conf": 0.7503592814371257, "calib/mu_c": 0.8633653846153848, "calib/mu_w": 0.5638095238095239, "calib/nonempty_final_conf_rate": 0.65234375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1320359281437126, "calib/std_conf": 0.2748628230875383, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.34375, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 913.390625, "completions/mean_terminated_length": 1391.8333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 785.0, "epoch": 0.14506666666666668, "grad_norm": 0.45474621653556824, "learning_rate": 1.8055555555555557e-06, "loss": -0.4577, "num_tokens": 37751594.0, "reward": 0.8631396293640137, "reward_std": 0.5096396207809448, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5309988260269165, "rewards/format_reward_step": 0.65234375, "rewards/stepwise_brier_reward": 0.5046233534812927, "step": 136 }, { "calib/answer_extract_rate": 0.65625, "calib/avg_num_step_conf": 8.36328125, "calib/ece": 0.15392857142857158, "calib/final_conf_rate": 0.65625, "calib/format_rate": 0.65625, "calib/frac_conf_gt_0.9": 0.5476190476190477, "calib/gap": 0.16138763197586747, "calib/mean_conf": 0.8069047619047619, "calib/mu_c": 0.8558974358974362, "calib/mu_w": 0.6945098039215687, "calib/nonempty_final_conf_rate": 0.65625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1322023809523811, "calib/std_conf": 0.23754686851067733, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.34375, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 942.3671875, "completions/mean_terminated_length": 1435.9881591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 798.0, "epoch": 0.14613333333333334, "grad_norm": 0.35132110118865967, "learning_rate": 1.777777777777778e-06, "loss": -0.4206, "num_tokens": 38122864.0, "reward": 0.910294234752655, "reward_std": 0.5566987991333008, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5172500014305115, "rewards/format_reward_step": 0.65625, "rewards/stepwise_brier_reward": 0.5160518884658813, "step": 137 }, { "calib/answer_extract_rate": 0.734375, "calib/avg_num_step_conf": 9.47265625, "calib/ece": 0.19737967914438515, "calib/final_conf_rate": 0.73046875, "calib/format_rate": 0.7265625, "calib/frac_conf_gt_0.9": 0.6737967914438503, "calib/gap": 0.1671767741935487, "calib/mean_conf": 0.8446524064171124, "calib/mu_c": 0.9000800000000002, "calib/mu_w": 0.7329032258064515, "calib/nonempty_final_conf_rate": 0.73046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18679144385026747, "calib/std_conf": 0.22550540274089256, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.265625, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 1003.65234375, "completions/mean_terminated_length": 1366.675537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 584.0, "epoch": 0.1472, "grad_norm": 0.48376113176345825, "learning_rate": 1.75e-06, "loss": -0.3257, "num_tokens": 38507175.0, "reward": 0.9776197671890259, "reward_std": 0.5277310609817505, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5589753985404968, "rewards/format_reward_step": 0.7265625, "rewards/stepwise_brier_reward": 0.5487784743309021, "step": 138 }, { "calib/answer_extract_rate": 0.81640625, "calib/avg_num_step_conf": 10.5390625, "calib/ece": 0.2093779904306221, "calib/final_conf_rate": 0.81640625, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.7177033492822966, "calib/gap": 0.11303598858898434, "calib/mean_conf": 0.8696650717703351, "calib/mu_c": 0.9031972789115649, "calib/mu_w": 0.7901612903225805, "calib/nonempty_final_conf_rate": 0.81640625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1878468899521532, "calib/std_conf": 0.2074667962714477, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.18359375, "completions/max_length": 2460.0, "completions/max_terminated_length": 2460.0, "completions/mean_length": 1105.13671875, "completions/mean_terminated_length": 1353.6602783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 751.0, "epoch": 0.14826666666666666, "grad_norm": 0.26218655705451965, "learning_rate": 1.7222222222222224e-06, "loss": -0.2185, "num_tokens": 38916226.0, "reward": 1.1242291927337646, "reward_std": 0.49970123171806335, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6268507838249207, "rewards/format_reward_step": 0.81640625, "rewards/stepwise_brier_reward": 0.6197774410247803, "step": 139 }, { "calib/answer_extract_rate": 0.80078125, "calib/avg_num_step_conf": 10.57421875, "calib/ece": 0.17700980392156873, "calib/final_conf_rate": 0.796875, "calib/format_rate": 0.796875, "calib/frac_conf_gt_0.9": 0.6470588235294118, "calib/gap": 0.15736183278223914, "calib/mean_conf": 0.8227941176470589, "calib/mu_c": 0.8675342465753425, "calib/mu_w": 0.7101724137931034, "calib/nonempty_final_conf_rate": 0.796875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14205882352941188, "calib/std_conf": 0.2532756708718664, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.203125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 1113.84375, "completions/mean_terminated_length": 1397.7647705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 733.0, "epoch": 0.14933333333333335, "grad_norm": 0.2385905385017395, "learning_rate": 1.6944444444444446e-06, "loss": -0.3415, "num_tokens": 39329426.0, "reward": 1.120464563369751, "reward_std": 0.5710935592651367, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6254988312721252, "rewards/format_reward_step": 0.796875, "rewards/stepwise_brier_reward": 0.6308607459068298, "step": 140 }, { "calib/answer_extract_rate": 0.765625, "calib/avg_num_step_conf": 9.63671875, "calib/ece": 0.09076530612244921, "calib/final_conf_rate": 0.765625, "calib/format_rate": 0.765625, "calib/frac_conf_gt_0.9": 0.6224489795918368, "calib/gap": 0.277523923444976, "calib/mean_conf": 0.7995408163265308, "calib/mu_c": 0.8618421052631579, "calib/mu_w": 0.5843181818181818, "calib/nonempty_final_conf_rate": 0.765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05739795918367369, "calib/std_conf": 0.2624225704410439, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.234375, "completions/max_length": 2658.0, "completions/max_terminated_length": 2658.0, "completions/mean_length": 1123.88671875, "completions/mean_terminated_length": 1467.93359375, "completions/min_length": 0.0, "completions/min_terminated_length": 812.0, "epoch": 0.1504, "grad_norm": 0.3722957372665405, "learning_rate": 1.6666666666666667e-06, "loss": -0.3131, "num_tokens": 39747277.0, "reward": 1.155234694480896, "reward_std": 0.5611724853515625, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6531496047973633, "rewards/format_reward_step": 0.765625, "rewards/stepwise_brier_reward": 0.6333895921707153, "step": 141 }, { "calib/answer_extract_rate": 0.81640625, "calib/avg_num_step_conf": 10.171875, "calib/ece": 0.15181818181818202, "calib/final_conf_rate": 0.81640625, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.5741626794258373, "calib/gap": 0.23373545621555436, "calib/mean_conf": 0.7832057416267943, "calib/mu_c": 0.8626086956521739, "calib/mu_w": 0.6288732394366195, "calib/nonempty_final_conf_rate": 0.81640625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13736842105263175, "calib/std_conf": 0.26833486321890476, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1796875, "completions/max_length": 2798.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 1136.8828125, "completions/mean_terminated_length": 1385.914306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 826.0, "epoch": 0.15146666666666667, "grad_norm": 0.32148581743240356, "learning_rate": 1.638888888888889e-06, "loss": -0.2852, "num_tokens": 40166519.0, "reward": 1.1054890155792236, "reward_std": 0.48387736082077026, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6477667689323425, "rewards/format_reward_step": 0.81640625, "rewards/stepwise_brier_reward": 0.6436101198196411, "step": 142 }, { "calib/answer_extract_rate": 0.81640625, "calib/avg_num_step_conf": 10.2578125, "calib/ece": 0.1689557416267945, "calib/final_conf_rate": 0.81640625, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.5598086124401914, "calib/gap": 0.2877036915793715, "calib/mean_conf": 0.7551590909090911, "calib/mu_c": 0.8556488970588235, "calib/mu_w": 0.567945205479452, "calib/nonempty_final_conf_rate": 0.81640625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13669856459330168, "calib/std_conf": 0.3021770587579477, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.18359375, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 1127.78125, "completions/mean_terminated_length": 1381.3970947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 857.0, "epoch": 0.15253333333333333, "grad_norm": 0.2856874465942383, "learning_rate": 1.6111111111111113e-06, "loss": -0.2282, "num_tokens": 40585607.0, "reward": 1.1005263328552246, "reward_std": 0.429995059967041, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6541682481765747, "rewards/format_reward_step": 0.81640625, "rewards/stepwise_brier_reward": 0.6422065496444702, "step": 143 }, { "calib/answer_extract_rate": 0.80078125, "calib/avg_num_step_conf": 10.16015625, "calib/ece": 0.12360975609756103, "calib/final_conf_rate": 0.80078125, "calib/format_rate": 0.80078125, "calib/frac_conf_gt_0.9": 0.6146341463414634, "calib/gap": 0.24458620689655175, "calib/mean_conf": 0.792, "calib/mu_c": 0.8635862068965517, "calib/mu_w": 0.619, "calib/nonempty_final_conf_rate": 0.80078125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10414634146341469, "calib/std_conf": 0.27203550700813073, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.19921875, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 1163.8046875, "completions/mean_terminated_length": 1453.3365478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 778.0, "epoch": 0.1536, "grad_norm": 0.7753714323043823, "learning_rate": 1.5833333333333333e-06, "loss": -0.3242, "num_tokens": 41010709.0, "reward": 1.1325209140777588, "reward_std": 0.5945384502410889, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6510945558547974, "rewards/format_reward_step": 0.80078125, "rewards/stepwise_brier_reward": 0.6419569253921509, "step": 144 }, { "calib/answer_extract_rate": 0.83984375, "calib/avg_num_step_conf": 10.0625, "calib/ece": 0.16651162790697696, "calib/final_conf_rate": 0.83984375, "calib/format_rate": 0.83984375, "calib/frac_conf_gt_0.9": 0.5627906976744186, "calib/gap": 0.16140656262505015, "calib/mean_conf": 0.7806511627906978, "calib/mu_c": 0.8317006802721089, "calib/mu_w": 0.6702941176470587, "calib/nonempty_final_conf_rate": 0.83984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13172093023255835, "calib/std_conf": 0.2668917456460517, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16015625, "completions/max_length": 2867.0, "completions/max_terminated_length": 2867.0, "completions/mean_length": 1172.73828125, "completions/mean_terminated_length": 1396.376708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 747.0, "epoch": 0.15466666666666667, "grad_norm": 0.3314751386642456, "learning_rate": 1.5555555555555558e-06, "loss": -0.1993, "num_tokens": 41436674.0, "reward": 1.1464898586273193, "reward_std": 0.45233646035194397, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6491436958312988, "rewards/format_reward_step": 0.83984375, "rewards/stepwise_brier_reward": 0.6548593640327454, "step": 145 }, { "calib/answer_extract_rate": 0.87109375, "calib/avg_num_step_conf": 9.67578125, "calib/ece": 0.21000000000000008, "calib/final_conf_rate": 0.87109375, "calib/format_rate": 0.87109375, "calib/frac_conf_gt_0.9": 0.484304932735426, "calib/gap": 0.1960435835351091, "calib/mean_conf": 0.7188789237668164, "calib/mu_c": 0.8111864406779663, "calib/mu_w": 0.6151428571428572, "calib/nonempty_final_conf_rate": 0.87109375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.199865470852018, "calib/std_conf": 0.30422009181563364, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 1151.5625, "completions/mean_terminated_length": 1321.97314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 799.0, "epoch": 0.15573333333333333, "grad_norm": 0.295030802488327, "learning_rate": 1.527777777777778e-06, "loss": -0.1696, "num_tokens": 41861730.0, "reward": 1.019819974899292, "reward_std": 0.4136025905609131, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6271792650222778, "rewards/format_reward_step": 0.87109375, "rewards/stepwise_brier_reward": 0.632733941078186, "step": 146 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 10.02734375, "calib/ece": 0.2022510822510824, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.5497835497835498, "calib/gap": 0.18307345861158586, "calib/mean_conf": 0.7513419913419914, "calib/mu_c": 0.8258394160583943, "calib/mu_w": 0.6427659574468084, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1802597402597404, "calib/std_conf": 0.2916076055181595, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 1135.1953125, "completions/mean_terminated_length": 1258.052001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 756.0, "epoch": 0.1568, "grad_norm": 0.2905469238758087, "learning_rate": 1.5e-06, "loss": -0.1829, "num_tokens": 42279060.0, "reward": 1.1319208145141602, "reward_std": 0.4488699436187744, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6649765372276306, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.6961677074432373, "step": 147 }, { "calib/answer_extract_rate": 0.921875, "calib/avg_num_step_conf": 9.98046875, "calib/ece": 0.1297033898305086, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.5932203389830508, "calib/gap": 0.2493200309957383, "calib/mean_conf": 0.7858050847457629, "calib/mu_c": 0.8470786516853934, "calib/mu_w": 0.5977586206896551, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0806355932203391, "calib/std_conf": 0.27004979950267227, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2720.0, "completions/max_terminated_length": 2720.0, "completions/mean_length": 1095.41015625, "completions/mean_terminated_length": 1188.2415771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 546.0, "epoch": 0.15786666666666666, "grad_norm": 0.3573284447193146, "learning_rate": 1.4722222222222225e-06, "loss": -0.134, "num_tokens": 42687637.0, "reward": 1.3620367050170898, "reward_std": 0.4463733434677124, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7680534720420837, "rewards/format_reward_step": 0.921875, "rewards/stepwise_brier_reward": 0.7620397806167603, "step": 148 }, { "calib/answer_extract_rate": 0.85546875, "calib/avg_num_step_conf": 9.08203125, "calib/ece": 0.14388127853881297, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.6301369863013698, "calib/gap": 0.29610000000000014, "calib/mean_conf": 0.7777625570776258, "calib/mu_c": 0.8791666666666669, "calib/mu_w": 0.5830666666666667, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1320547945205481, "calib/std_conf": 0.2957337724521902, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14453125, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 1135.609375, "completions/mean_terminated_length": 1327.47021484375, "completions/min_length": 0.0, "completions/min_terminated_length": 682.0, "epoch": 0.15893333333333334, "grad_norm": 0.3924039900302887, "learning_rate": 1.4444444444444445e-06, "loss": -0.1958, "num_tokens": 43105849.0, "reward": 1.1683149337768555, "reward_std": 0.46608710289001465, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6897277235984802, "rewards/format_reward_step": 0.85546875, "rewards/stepwise_brier_reward": 0.7016169428825378, "step": 149 }, { "calib/answer_extract_rate": 0.91015625, "calib/avg_num_step_conf": 9.7421875, "calib/ece": 0.16900862068965544, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.7586206896551724, "calib/gap": 0.22897058823529437, "calib/mean_conf": 0.8653879310344827, "calib/mu_c": 0.9325000000000002, "calib/mu_w": 0.7035294117647058, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16375000000000028, "calib/std_conf": 0.2334354105289573, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2899.0, "completions/max_terminated_length": 2899.0, "completions/mean_length": 1121.7265625, "completions/mean_terminated_length": 1232.4549560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 745.0, "epoch": 0.16, "grad_norm": 0.4510607123374939, "learning_rate": 1.4166666666666667e-06, "loss": -0.1138, "num_tokens": 43521011.0, "reward": 1.2876085042953491, "reward_std": 0.33042508363723755, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7323199510574341, "rewards/format_reward_step": 0.90625, "rewards/stepwise_brier_reward": 0.7451692819595337, "step": 150 }, { "calib/answer_extract_rate": 0.9140625, "calib/avg_num_step_conf": 9.40625, "calib/ece": 0.179871794871795, "calib/final_conf_rate": 0.9140625, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.5555555555555556, "calib/gap": 0.21956062461726877, "calib/mean_conf": 0.7550854700854701, "calib/mu_c": 0.8414084507042254, "calib/mu_w": 0.6218478260869567, "calib/nonempty_final_conf_rate": 0.9140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1640598290598292, "calib/std_conf": 0.28199266743966284, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 1147.74609375, "completions/mean_terminated_length": 1255.6539306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 702.0, "epoch": 0.16106666666666666, "grad_norm": 0.3453322947025299, "learning_rate": 1.3888888888888892e-06, "loss": -0.1646, "num_tokens": 43944898.0, "reward": 1.1735072135925293, "reward_std": 0.3468888998031616, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6989699006080627, "rewards/format_reward_step": 0.9140625, "rewards/stepwise_brier_reward": 0.7117139101028442, "step": 151 }, { "calib/answer_extract_rate": 0.85546875, "calib/avg_num_step_conf": 8.62890625, "calib/ece": 0.17242009132420114, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.5342465753424658, "calib/gap": 0.27221547536433044, "calib/mean_conf": 0.7330593607305937, "calib/mu_c": 0.8424427480916031, "calib/mu_w": 0.5702272727272727, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1536529680365299, "calib/std_conf": 0.2954947111712576, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 2931.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 1103.75, "completions/mean_terminated_length": 1284.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 759.0, "epoch": 0.16213333333333332, "grad_norm": 0.3742715120315552, "learning_rate": 1.3611111111111112e-06, "loss": -0.1866, "num_tokens": 44355890.0, "reward": 1.101632833480835, "reward_std": 0.4346971809864044, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6715320348739624, "rewards/format_reward_step": 0.85546875, "rewards/stepwise_brier_reward": 0.6744046807289124, "step": 152 }, { "calib/answer_extract_rate": 0.91015625, "calib/avg_num_step_conf": 9.0625, "calib/ece": 0.20343347639485, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.630901287553648, "calib/gap": 0.16771658721025817, "calib/mean_conf": 0.7779399141630904, "calib/mu_c": 0.834805194805195, "calib/mu_w": 0.6670886075949368, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16021459227467832, "calib/std_conf": 0.2906379778314124, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 1091.0546875, "completions/mean_terminated_length": 1193.632568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 533.0, "epoch": 0.1632, "grad_norm": 0.3894638419151306, "learning_rate": 1.3333333333333334e-06, "loss": -0.1133, "num_tokens": 44765560.0, "reward": 1.2110968828201294, "reward_std": 0.3976651132106781, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6852694749832153, "rewards/format_reward_step": 0.91015625, "rewards/stepwise_brier_reward": 0.703535795211792, "step": 153 }, { "calib/answer_extract_rate": 0.92578125, "calib/avg_num_step_conf": 8.95703125, "calib/ece": 0.24582278481012673, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.6455696202531646, "calib/gap": 0.1993506493506494, "calib/mean_conf": 0.798649789029536, "calib/mu_c": 0.8869696969696971, "calib/mu_w": 0.6876190476190477, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24375527426160354, "calib/std_conf": 0.2748146128667527, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 1094.3203125, "completions/mean_terminated_length": 1182.050537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 706.0, "epoch": 0.16426666666666667, "grad_norm": 0.3308521509170532, "learning_rate": 1.3055555555555556e-06, "loss": -0.0203, "num_tokens": 45173186.0, "reward": 1.1115479469299316, "reward_std": 0.33493223786354065, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6644241809844971, "rewards/format_reward_step": 0.92578125, "rewards/stepwise_brier_reward": 0.6845309734344482, "step": 154 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 8.796875, "calib/ece": 0.22666666666666685, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.625, "calib/gap": 0.14535439982444576, "calib/mean_conf": 0.7974166666666669, "calib/mu_c": 0.8537414965986394, "calib/mu_w": 0.7083870967741936, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20579166666666684, "calib/std_conf": 0.26533625155430396, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2286.0, "completions/max_terminated_length": 2286.0, "completions/mean_length": 1043.34375, "completions/mean_terminated_length": 1112.9000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 592.0, "epoch": 0.16533333333333333, "grad_norm": 0.3368653953075409, "learning_rate": 1.2777777777777779e-06, "loss": -0.0878, "num_tokens": 45570538.0, "reward": 1.1853913068771362, "reward_std": 0.4266975522041321, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6816155910491943, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.7064590454101562, "step": 155 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 8.80859375, "calib/ece": 0.254857142857143, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.6693877551020408, "calib/gap": 0.13213520749665342, "calib/mean_conf": 0.7913469387755104, "calib/mu_c": 0.8361111111111112, "calib/mu_w": 0.7039759036144578, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19248979591836748, "calib/std_conf": 0.3032884766054413, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2529.0, "completions/max_terminated_length": 2529.0, "completions/mean_length": 1077.88671875, "completions/mean_terminated_length": 1126.2816162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 621.0, "epoch": 0.1664, "grad_norm": 0.32788556814193726, "learning_rate": 1.25e-06, "loss": -0.0551, "num_tokens": 45974277.0, "reward": 1.264580249786377, "reward_std": 0.3429688513278961, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6950687170028687, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7541208863258362, "step": 156 }, { "calib/answer_extract_rate": 0.91796875, "calib/avg_num_step_conf": 8.62890625, "calib/ece": 0.13868085106383005, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.7404255319148936, "calib/gap": 0.23696136408436852, "calib/mean_conf": 0.8524680851063832, "calib/mu_c": 0.9099438202247194, "calib/mu_w": 0.6729824561403509, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11685106382978751, "calib/std_conf": 0.24720366752287212, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2307.0, "completions/max_terminated_length": 2307.0, "completions/mean_length": 979.1015625, "completions/mean_terminated_length": 1066.595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 497.0, "epoch": 0.16746666666666668, "grad_norm": 0.3304344415664673, "learning_rate": 1.2222222222222223e-06, "loss": -0.0712, "num_tokens": 46351695.0, "reward": 1.3637332916259766, "reward_std": 0.33922278881073, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7648605108261108, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.7767749428749084, "step": 157 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.87109375, "calib/ece": 0.21689516129032282, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.8024193548387096, "calib/gap": 0.11029681762545929, "calib/mean_conf": 0.897943548387097, "calib/mu_c": 0.9317441860465118, "calib/mu_w": 0.8214473684210525, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21064516129032285, "calib/std_conf": 0.18728375928156385, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2339.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 987.1328125, "completions/mean_terminated_length": 1018.9757690429688, "completions/min_length": 0.0, "completions/min_terminated_length": 651.0, "epoch": 0.16853333333333334, "grad_norm": 0.600447952747345, "learning_rate": 1.1944444444444446e-06, "loss": -0.0271, "num_tokens": 46732681.0, "reward": 1.3191943168640137, "reward_std": 0.39712393283843994, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.7299152612686157, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7435094118118286, "step": 158 }, { "calib/answer_extract_rate": 0.93359375, "calib/avg_num_step_conf": 8.609375, "calib/ece": 0.26418410041841023, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.7907949790794979, "calib/gap": 0.12414196686426482, "calib/mean_conf": 0.8959832635983266, "calib/mu_c": 0.9406535947712419, "calib/mu_w": 0.8165116279069771, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2600000000000002, "calib/std_conf": 0.18460796336643473, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2154.0, "completions/max_terminated_length": 2154.0, "completions/mean_length": 936.7421875, "completions/mean_terminated_length": 1003.3723754882812, "completions/min_length": 0.0, "completions/min_terminated_length": 632.0, "epoch": 0.1696, "grad_norm": 0.3773444592952728, "learning_rate": 1.1666666666666668e-06, "loss": -0.0523, "num_tokens": 47100311.0, "reward": 1.2171008586883545, "reward_std": 0.3303322494029999, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6760132908821106, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.7538768649101257, "step": 159 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 8.66796875, "calib/ece": 0.2105371900826447, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.8016528925619835, "calib/gap": 0.08989621942179415, "calib/mean_conf": 0.9030991735537193, "calib/mu_c": 0.9294736842105265, "calib/mu_w": 0.8395774647887323, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20351239669421495, "calib/std_conf": 0.17958141895077795, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2108.0, "completions/max_terminated_length": 2108.0, "completions/mean_length": 963.9296875, "completions/mean_terminated_length": 1019.6941528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 599.0, "epoch": 0.17066666666666666, "grad_norm": 0.3580320477485657, "learning_rate": 1.138888888888889e-06, "loss": -0.0707, "num_tokens": 47474957.0, "reward": 1.3058538436889648, "reward_std": 0.4367513656616211, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7175910472869873, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.7382331490516663, "step": 160 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.90625, "calib/ece": 0.15040485829959527, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8461538461538461, "calib/gap": 0.12649431818181822, "calib/mean_conf": 0.9221457489878544, "calib/mu_c": 0.9503125, "calib/mu_w": 0.8238181818181818, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1476113360323888, "calib/std_conf": 0.1610663196451516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 976.9375, "completions/mean_terminated_length": 1012.534423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 644.0, "epoch": 0.17173333333333332, "grad_norm": 0.34767386317253113, "learning_rate": 1.111111111111111e-06, "loss": -0.0098, "num_tokens": 47852013.0, "reward": 1.4458568096160889, "reward_std": 0.3386097550392151, "rewards/accuracy_reward_step": 0.75, "rewards/final_brier_reward_step": 0.7948245406150818, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8094027638435364, "step": 161 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 9.2734375, "calib/ece": 0.21533864541832684, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8844621513944223, "calib/gap": 0.11473164516642786, "calib/mean_conf": 0.9404382470119522, "calib/mu_c": 0.9719780219780222, "calib/mu_w": 0.8572463768115943, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21533864541832684, "calib/std_conf": 0.1442255288036068, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1632.0, "completions/max_terminated_length": 1632.0, "completions/mean_length": 918.28125, "completions/mean_terminated_length": 936.57373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 411.0, "epoch": 0.1728, "grad_norm": 0.3539327383041382, "learning_rate": 1.0833333333333335e-06, "loss": -0.0135, "num_tokens": 48214277.0, "reward": 1.3857910633087158, "reward_std": 0.2928190231323242, "rewards/accuracy_reward_step": 0.7109375, "rewards/final_brier_reward_step": 0.7601113319396973, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.788566529750824, "step": 162 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 8.9921875, "calib/ece": 0.2649795918367348, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.7387755102040816, "calib/gap": 0.13937535370684762, "calib/mean_conf": 0.879265306122449, "calib/mu_c": 0.932171052631579, "calib/mu_w": 0.7927956989247313, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2619183673469389, "calib/std_conf": 0.1996043817266999, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2537.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 955.48046875, "completions/mean_terminated_length": 998.3795776367188, "completions/min_length": 0.0, "completions/min_terminated_length": 572.0, "epoch": 0.17386666666666667, "grad_norm": 0.38675457239151, "learning_rate": 1.0555555555555557e-06, "loss": -0.0548, "num_tokens": 48586752.0, "reward": 1.2166765928268433, "reward_std": 0.3924652338027954, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6922163963317871, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7244610786437988, "step": 163 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.140625, "calib/ece": 0.25266129032258083, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.7983870967741935, "calib/gap": 0.10638242894056837, "calib/mean_conf": 0.9058870967741937, "calib/mu_c": 0.9427777777777779, "calib/mu_w": 0.8363953488372096, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25266129032258083, "calib/std_conf": 0.17819214030300223, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 979.5625, "completions/mean_terminated_length": 1011.1612548828125, "completions/min_length": 0.0, "completions/min_terminated_length": 532.0, "epoch": 0.17493333333333333, "grad_norm": 0.36825188994407654, "learning_rate": 1.0277777777777777e-06, "loss": -0.0458, "num_tokens": 48966696.0, "reward": 1.2688332796096802, "reward_std": 0.43453603982925415, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7033937573432922, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7497953176498413, "step": 164 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 9.37890625, "calib/ece": 0.32975609756097557, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8252032520325203, "calib/gap": 0.1507761437908497, "calib/mean_conf": 0.9151219512195123, "calib/mu_c": 0.9776388888888891, "calib/mu_w": 0.8268627450980394, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32975609756097557, "calib/std_conf": 0.17408700681242176, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2404.0, "completions/max_terminated_length": 2404.0, "completions/mean_length": 932.90625, "completions/mean_terminated_length": 970.8292236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 613.0, "epoch": 0.176, "grad_norm": 0.2954544723033905, "learning_rate": 1.0000000000000002e-06, "loss": -0.0724, "num_tokens": 49334136.0, "reward": 1.1690062284469604, "reward_std": 0.2975730895996094, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6644234657287598, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7128032445907593, "step": 165 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 9.69921875, "calib/ece": 0.14987704918032807, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.819672131147541, "calib/gap": 0.25266253869969046, "calib/mean_conf": 0.9162704918032789, "calib/mu_c": 0.975294117647059, "calib/mu_w": 0.7226315789473685, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14987704918032807, "calib/std_conf": 0.16811766648691037, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2859.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 912.1796875, "completions/mean_terminated_length": 953.1346435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 573.0, "epoch": 0.17706666666666668, "grad_norm": 0.3518250286579132, "learning_rate": 9.722222222222224e-07, "loss": -0.0472, "num_tokens": 49696878.0, "reward": 1.4419491291046143, "reward_std": 0.298769474029541, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.8203636407852173, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8239436745643616, "step": 166 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 10.29296875, "calib/ece": 0.2028740157480316, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8858267716535433, "calib/gap": 0.09488400488400495, "calib/mean_conf": 0.9444488188976379, "calib/mu_c": 0.9687301587301589, "calib/mu_w": 0.8738461538461539, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20161417322834657, "calib/std_conf": 0.14152255370528902, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2602.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 893.87109375, "completions/mean_terminated_length": 904.4703979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 535.0, "epoch": 0.17813333333333334, "grad_norm": 0.33679449558258057, "learning_rate": 9.444444444444445e-07, "loss": 0.0066, "num_tokens": 50054357.0, "reward": 1.4315710067749023, "reward_std": 0.32214871048927307, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7794097661972046, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8174647092819214, "step": 167 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.1015625, "calib/ece": 0.23404000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.908, "calib/gap": 0.1151349206349207, "calib/mean_conf": 0.95404, "calib/mu_c": 0.9862777777777778, "calib/mu_w": 0.8711428571428571, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23404000000000003, "calib/std_conf": 0.12890336845870243, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2232.0, "completions/max_terminated_length": 2232.0, "completions/mean_length": 904.546875, "completions/mean_terminated_length": 926.2560424804688, "completions/min_length": 0.0, "completions/min_terminated_length": 557.0, "epoch": 0.1792, "grad_norm": 0.4689064919948578, "learning_rate": 9.166666666666666e-07, "loss": -0.0086, "num_tokens": 50413633.0, "reward": 1.373067855834961, "reward_std": 0.4074147939682007, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7553042769432068, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7785377502441406, "step": 168 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.72265625, "calib/ece": 0.29684000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.84, "calib/gap": 0.1715272427077601, "calib/mean_conf": 0.92884, "calib/mu_c": 0.9919620253164558, "calib/mu_w": 0.8204347826086957, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29684000000000005, "calib/std_conf": 0.17167834575158278, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1703.0, "completions/max_terminated_length": 1703.0, "completions/mean_length": 860.4453125, "completions/mean_terminated_length": 881.0960693359375, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.18026666666666666, "grad_norm": 0.3696523606777191, "learning_rate": 8.88888888888889e-07, "loss": -0.0199, "num_tokens": 50761131.0, "reward": 1.2635154724121094, "reward_std": 0.29985445737838745, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7125222682952881, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7696424722671509, "step": 169 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 10.53515625, "calib/ece": 0.24538152610441774, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8755020080321285, "calib/gap": 0.07810114942528712, "calib/mean_conf": 0.9441767068273094, "calib/mu_c": 0.9677011494252873, "calib/mu_w": 0.8896000000000002, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24538152610441774, "calib/std_conf": 0.14681431357425442, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1884.0, "completions/max_terminated_length": 1884.0, "completions/mean_length": 890.81640625, "completions/mean_terminated_length": 915.859375, "completions/min_length": 0.0, "completions/min_terminated_length": 608.0, "epoch": 0.18133333333333335, "grad_norm": 0.35689327120780945, "learning_rate": 8.611111111111112e-07, "loss": -0.0341, "num_tokens": 51116372.0, "reward": 1.332000970840454, "reward_std": 0.4051094353199005, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.7203788757324219, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7794337868690491, "step": 170 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 10.61328125, "calib/ece": 0.39426829268292696, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8699186991869918, "calib/gap": 0.08041441441441433, "calib/mean_conf": 0.9430487804878049, "calib/mu_c": 0.9793333333333334, "calib/mu_w": 0.8989189189189191, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39426829268292696, "calib/std_conf": 0.15150751161493137, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 878.06640625, "completions/mean_terminated_length": 913.7601318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 585.0, "epoch": 0.1824, "grad_norm": 0.4127947688102722, "learning_rate": 8.333333333333333e-07, "loss": -0.0604, "num_tokens": 51471093.0, "reward": 1.082554817199707, "reward_std": 0.3952634930610657, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5898253917694092, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.6568180322647095, "step": 171 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 11.1875, "calib/ece": 0.24062992125984253, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9173228346456693, "calib/gap": 0.014017182130584338, "calib/mean_conf": 0.9650393700787402, "calib/mu_c": 0.9683505154639176, "calib/mu_w": 0.9543333333333333, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2209448818897638, "calib/std_conf": 0.12837183943006472, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2567.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 898.984375, "completions/mean_terminated_length": 909.644287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 596.0, "epoch": 0.18346666666666667, "grad_norm": 0.4310997724533081, "learning_rate": 8.055555555555557e-07, "loss": -0.0105, "num_tokens": 51827625.0, "reward": 1.4327988624572754, "reward_std": 0.3014458417892456, "rewards/accuracy_reward_step": 0.7578125, "rewards/final_brier_reward_step": 0.7577491998672485, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7891346216201782, "step": 172 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 11.22265625, "calib/ece": 0.3241224489795919, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.9510204081632653, "calib/gap": 0.024891304347825827, "calib/mean_conf": 0.9802857142857143, "calib/mu_c": 0.9888198757763974, "calib/mu_w": 0.9639285714285716, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3236326530612246, "calib/std_conf": 0.08761884057946553, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2031.0, "completions/max_terminated_length": 2031.0, "completions/mean_length": 836.6796875, "completions/mean_terminated_length": 874.244873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 466.0, "epoch": 0.18453333333333333, "grad_norm": 0.36348190903663635, "learning_rate": 7.777777777777779e-07, "loss": -0.0406, "num_tokens": 52168015.0, "reward": 1.2189308404922485, "reward_std": 0.24862341582775116, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6448589563369751, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.6875680685043335, "step": 173 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 10.60546875, "calib/ece": 0.3365432098765434, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.8559670781893004, "calib/gap": 0.09681122448979584, "calib/mean_conf": 0.9414814814814816, "calib/mu_c": 0.9797278911564624, "calib/mu_w": 0.8829166666666666, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3365432098765434, "calib/std_conf": 0.15306292133471497, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2782.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 898.26171875, "completions/mean_terminated_length": 946.3168334960938, "completions/min_length": 0.0, "completions/min_terminated_length": 523.0, "epoch": 0.1856, "grad_norm": 0.4376189410686493, "learning_rate": 7.5e-07, "loss": -0.0946, "num_tokens": 52525242.0, "reward": 1.1533571481704712, "reward_std": 0.48044002056121826, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6326359510421753, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.6747190356254578, "step": 174 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 10.56640625, "calib/ece": 0.37688796680497927, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.8796680497925311, "calib/gap": 0.02313971742543175, "calib/mean_conf": 0.9559751037344397, "calib/mu_c": 0.9653846153846155, "calib/mu_w": 0.9422448979591838, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3697510373443984, "calib/std_conf": 0.1329773301526723, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2255.0, "completions/max_terminated_length": 2255.0, "completions/mean_length": 861.69140625, "completions/mean_terminated_length": 911.541259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 557.0, "epoch": 0.18666666666666668, "grad_norm": 0.4053885340690613, "learning_rate": 7.222222222222222e-07, "loss": -0.0515, "num_tokens": 52874699.0, "reward": 1.1094977855682373, "reward_std": 0.5093092918395996, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5843409895896912, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.6583711504936218, "step": 175 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 10.9921875, "calib/ece": 0.30984313725490203, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8823529411764706, "calib/gap": 0.07997932782075223, "calib/mean_conf": 0.9490588235294117, "calib/mu_c": 0.9779141104294479, "calib/mu_w": 0.8979347826086956, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30984313725490203, "calib/std_conf": 0.1494115794348087, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1650.0, "completions/max_terminated_length": 1650.0, "completions/mean_length": 903.86328125, "completions/mean_terminated_length": 910.9802856445312, "completions/min_length": 0.0, "completions/min_terminated_length": 517.0, "epoch": 0.18773333333333334, "grad_norm": 0.3664768636226654, "learning_rate": 6.944444444444446e-07, "loss": 0.0227, "num_tokens": 53233192.0, "reward": 1.2727751731872559, "reward_std": 0.27945634722709656, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.6852566599845886, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7752746343612671, "step": 176 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.94140625, "calib/ece": 0.2897200000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.852, "calib/gap": 0.07533333333333314, "calib/mean_conf": 0.94372, "calib/mu_c": 0.9693333333333333, "calib/mu_w": 0.8940000000000001, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2867200000000001, "calib/std_conf": 0.1444581655705208, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1574.0, "completions/max_terminated_length": 1574.0, "completions/mean_length": 862.55078125, "completions/mean_terminated_length": 883.2520141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 450.0, "epoch": 0.1888, "grad_norm": 0.38255923986434937, "learning_rate": 6.666666666666667e-07, "loss": -0.0378, "num_tokens": 53580877.0, "reward": 1.2706055641174316, "reward_std": 0.31008511781692505, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.6875433921813965, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7401479482650757, "step": 177 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 10.94140625, "calib/ece": 0.2507569721115539, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8924302788844621, "calib/gap": 0.057092685906245166, "calib/mean_conf": 0.9559362549800797, "calib/mu_c": 0.9727683615819209, "calib/mu_w": 0.9156756756756758, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2507569721115539, "calib/std_conf": 0.13001463264384003, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1506.0, "completions/max_terminated_length": 1506.0, "completions/mean_length": 841.1328125, "completions/mean_terminated_length": 854.4841918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 531.0, "epoch": 0.18986666666666666, "grad_norm": 0.37112635374069214, "learning_rate": 6.388888888888889e-07, "loss": -0.017, "num_tokens": 53925319.0, "reward": 1.3369669914245605, "reward_std": 0.4431039094924927, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.7177726626396179, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7560725808143616, "step": 178 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 10.47265625, "calib/ece": 0.27591836734693886, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.8857142857142857, "calib/gap": 0.1015212749733112, "calib/mean_conf": 0.953469387755102, "calib/mu_c": 0.9862048192771085, "calib/mu_w": 0.8846835443037973, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27591836734693886, "calib/std_conf": 0.1372700454921651, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2137.0, "completions/max_terminated_length": 2137.0, "completions/mean_length": 858.78125, "completions/mean_terminated_length": 897.3387451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 439.0, "epoch": 0.19093333333333334, "grad_norm": 0.36745166778564453, "learning_rate": 6.111111111111112e-07, "loss": -0.0585, "num_tokens": 54274471.0, "reward": 1.281066656112671, "reward_std": 0.4145931005477905, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6955976486206055, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7580711841583252, "step": 179 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.26953125, "calib/ece": 0.2721825396825397, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8174603174603174, "calib/gap": 0.10623385939741747, "calib/mean_conf": 0.9333730158730158, "calib/mu_c": 0.9679411764705883, "calib/mu_w": 0.8617073170731708, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2654761904761905, "calib/std_conf": 0.15649475036212718, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1696.0, "completions/max_terminated_length": 1696.0, "completions/mean_length": 926.98046875, "completions/mean_terminated_length": 941.6945190429688, "completions/min_length": 0.0, "completions/min_terminated_length": 533.0, "epoch": 0.192, "grad_norm": 0.44686973094940186, "learning_rate": 5.833333333333334e-07, "loss": -0.0096, "num_tokens": 54638674.0, "reward": 1.3219671249389648, "reward_std": 0.322432279586792, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7163659930229187, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8098236918449402, "step": 180 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.296875, "calib/ece": 0.3192460317460317, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8293650793650794, "calib/gap": 0.1577126704356503, "calib/mean_conf": 0.9343253968253967, "calib/mu_c": 0.9950322580645162, "calib/mu_w": 0.8373195876288659, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3192460317460317, "calib/std_conf": 0.1545857921439711, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2251.0, "completions/max_terminated_length": 2251.0, "completions/mean_length": 891.0234375, "completions/mean_terminated_length": 905.166748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 590.0, "epoch": 0.19306666666666666, "grad_norm": 0.3694749176502228, "learning_rate": 5.555555555555555e-07, "loss": -0.0261, "num_tokens": 54996080.0, "reward": 1.23591947555542, "reward_std": 0.4027971625328064, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6970745921134949, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7354660034179688, "step": 181 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 10.453125, "calib/ece": 0.27925490196078434, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8549019607843137, "calib/gap": 0.1389244663382594, "calib/mean_conf": 0.938078431372549, "calib/mu_c": 0.9854761904761905, "calib/mu_w": 0.8465517241379311, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27925490196078434, "calib/std_conf": 0.16721898793782713, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1656.0, "completions/max_terminated_length": 1656.0, "completions/mean_length": 879.390625, "completions/mean_terminated_length": 886.31494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 531.0, "epoch": 0.19413333333333332, "grad_norm": 0.41442859172821045, "learning_rate": 5.277777777777779e-07, "loss": -0.0078, "num_tokens": 55350404.0, "reward": 1.3059650659561157, "reward_std": 0.33462876081466675, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7249683737754822, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7520487308502197, "step": 182 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 9.91796875, "calib/ece": 0.21244979919678725, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.8152610441767069, "calib/gap": 0.08924490809736707, "calib/mean_conf": 0.9210441767068273, "calib/mu_c": 0.9446994535519125, "calib/mu_w": 0.8554545454545455, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19927710843373503, "calib/std_conf": 0.1821660008445034, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2061.0, "completions/max_terminated_length": 2061.0, "completions/mean_length": 850.04296875, "completions/mean_terminated_length": 870.4440307617188, "completions/min_length": 0.0, "completions/min_terminated_length": 424.0, "epoch": 0.1952, "grad_norm": 0.36135947704315186, "learning_rate": 5.000000000000001e-07, "loss": -0.0379, "num_tokens": 55697735.0, "reward": 1.369386911392212, "reward_std": 0.38945871591567993, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7393156290054321, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.7614158391952515, "step": 183 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.98828125, "calib/ece": 0.20786290322580644, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.8790322580645161, "calib/gap": 0.12473392181588894, "calib/mean_conf": 0.941733870967742, "calib/mu_c": 0.9744262295081967, "calib/mu_w": 0.8496923076923077, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20584677419354838, "calib/std_conf": 0.17850809184111946, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 848.4609375, "completions/mean_terminated_length": 875.8306274414062, "completions/min_length": 0.0, "completions/min_terminated_length": 582.0, "epoch": 0.19626666666666667, "grad_norm": 0.4687948226928711, "learning_rate": 4.7222222222222226e-07, "loss": -0.0518, "num_tokens": 56043261.0, "reward": 1.3840022087097168, "reward_std": 0.442663311958313, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7531074285507202, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7876062393188477, "step": 184 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.0390625, "calib/ece": 0.22297619047619047, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.7817460317460317, "calib/gap": 0.2493959827833574, "calib/mean_conf": 0.8970238095238096, "calib/mu_c": 0.9781764705882354, "calib/mu_w": 0.728780487804878, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2226984126984127, "calib/std_conf": 0.214737453734278, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1970.0, "completions/max_terminated_length": 1970.0, "completions/mean_length": 857.51171875, "completions/mean_terminated_length": 874.5936279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 501.0, "epoch": 0.19733333333333333, "grad_norm": 0.48540031909942627, "learning_rate": 4.444444444444445e-07, "loss": 0.002, "num_tokens": 56392744.0, "reward": 1.350165843963623, "reward_std": 0.3727245628833771, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.778076171875, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7960731983184814, "step": 185 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.8203125, "calib/ece": 0.21489959839357436, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.7389558232931727, "calib/gap": 0.1803615520282188, "calib/mean_conf": 0.8815662650602409, "calib/mu_c": 0.9402380952380953, "calib/mu_w": 0.7598765432098765, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21088353413654626, "calib/std_conf": 0.2124960586133561, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1933.0, "completions/max_terminated_length": 1933.0, "completions/mean_length": 888.76171875, "completions/mean_terminated_length": 913.7469482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 560.0, "epoch": 0.1984, "grad_norm": 0.3782590329647064, "learning_rate": 4.1666666666666667e-07, "loss": -0.0279, "num_tokens": 56748347.0, "reward": 1.3173680305480957, "reward_std": 0.3679579496383667, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7408746480941772, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7799100875854492, "step": 186 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 9.6484375, "calib/ece": 0.24423387096774204, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.7056451612903226, "calib/gap": 0.09330047462577595, "calib/mean_conf": 0.8766532258064514, "calib/mu_c": 0.907878787878788, "calib/mu_w": 0.814578313253012, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22778225806451624, "calib/std_conf": 0.20995508522580444, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 940.7578125, "completions/mean_terminated_length": 967.2047729492188, "completions/min_length": 0.0, "completions/min_terminated_length": 506.0, "epoch": 0.19946666666666665, "grad_norm": 0.4038413166999817, "learning_rate": 3.8888888888888895e-07, "loss": -0.0163, "num_tokens": 57113765.0, "reward": 1.2750946283340454, "reward_std": 0.4495476484298706, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.6956921815872192, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7496190071105957, "step": 187 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 10.046875, "calib/ece": 0.1967450980392157, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.803921568627451, "calib/gap": 0.20235303123086357, "calib/mean_conf": 0.9109411764705884, "calib/mu_c": 0.9672826086956523, "calib/mu_w": 0.7649295774647887, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19305882352941175, "calib/std_conf": 0.19630882150972606, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1942.0, "completions/max_terminated_length": 1942.0, "completions/mean_length": 895.890625, "completions/mean_terminated_length": 902.9448852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 542.0, "epoch": 0.20053333333333334, "grad_norm": 0.37633347511291504, "learning_rate": 3.611111111111111e-07, "loss": 0.0081, "num_tokens": 57470225.0, "reward": 1.4211552143096924, "reward_std": 0.27990084886550903, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.795285165309906, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8237380981445312, "step": 188 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 10.16796875, "calib/ece": 0.21062745098039215, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.7176470588235294, "calib/gap": 0.15619883040935656, "calib/mean_conf": 0.878078431372549, "calib/mu_c": 0.92953216374269, "calib/mu_w": 0.7733333333333334, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20905882352941177, "calib/std_conf": 0.2113370918640157, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1918.0, "completions/max_terminated_length": 1918.0, "completions/mean_length": 867.20703125, "completions/mean_terminated_length": 874.035400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 546.0, "epoch": 0.2016, "grad_norm": 0.3926154375076294, "learning_rate": 3.3333333333333335e-07, "loss": 0.0016, "num_tokens": 57823038.0, "reward": 1.3388947248458862, "reward_std": 0.3091922998428345, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7508214712142944, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7883110046386719, "step": 189 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 9.83203125, "calib/ece": 0.20427419354838708, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.26648640551269565, "calib/mean_conf": 0.8783064516129032, "calib/mu_c": 0.9631952662721893, "calib/mu_w": 0.6967088607594937, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20056451612903223, "calib/std_conf": 0.2390664699516661, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2343.0, "completions/max_terminated_length": 2343.0, "completions/mean_length": 892.70703125, "completions/mean_terminated_length": 917.8031616210938, "completions/min_length": 0.0, "completions/min_terminated_length": 546.0, "epoch": 0.20266666666666666, "grad_norm": 0.3378767669200897, "learning_rate": 3.055555555555556e-07, "loss": -0.0152, "num_tokens": 58180219.0, "reward": 1.346990704536438, "reward_std": 0.2692444920539856, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.7776304483413696, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8045772314071655, "step": 190 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.078125, "calib/ece": 0.27876, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.752, "calib/gap": 0.17710599016238815, "calib/mean_conf": 0.89076, "calib/mu_c": 0.9594771241830066, "calib/mu_w": 0.7823711340206184, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27876, "calib/std_conf": 0.20569254337481466, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2427.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 877.078125, "completions/mean_terminated_length": 898.1280517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 505.0, "epoch": 0.20373333333333332, "grad_norm": 0.4926356077194214, "learning_rate": 2.7777777777777776e-07, "loss": -0.0411, "num_tokens": 58531959.0, "reward": 1.2263085842132568, "reward_std": 0.286679208278656, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7017941474914551, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7235209345817566, "step": 191 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 9.90234375, "calib/ece": 0.18468, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.784, "calib/gap": 0.17026425250795207, "calib/mean_conf": 0.89732, "calib/mu_c": 0.9429508196721311, "calib/mu_w": 0.772686567164179, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17500000000000002, "calib/std_conf": 0.20747148623365091, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1855.0, "completions/max_terminated_length": 1855.0, "completions/mean_length": 889.34375, "completions/mean_terminated_length": 910.6880493164062, "completions/min_length": 0.0, "completions/min_terminated_length": 404.0, "epoch": 0.2048, "grad_norm": 0.39645713567733765, "learning_rate": 2.5000000000000004e-07, "loss": -0.0324, "num_tokens": 58887647.0, "reward": 1.3959075212478638, "reward_std": 0.282784640789032, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7707542777061462, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7968087196350098, "step": 192 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 9.890625, "calib/ece": 0.25155102040816335, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.7918367346938775, "calib/gap": 0.16522256728778462, "calib/mean_conf": 0.9086938775510204, "calib/mu_c": 0.9653416149068323, "calib/mu_w": 0.8001190476190477, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25155102040816335, "calib/std_conf": 0.1963377144763097, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 1972.0, "completions/max_terminated_length": 1972.0, "completions/mean_length": 879.12109375, "completions/mean_terminated_length": 918.591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 573.0, "epoch": 0.20586666666666667, "grad_norm": 0.4955199956893921, "learning_rate": 2.2222222222222224e-07, "loss": -0.0345, "num_tokens": 59241454.0, "reward": 1.2673017978668213, "reward_std": 0.43317940831184387, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7152073979377747, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7403548955917358, "step": 193 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 10.1875, "calib/ece": 0.21822134387351774, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.7193675889328063, "calib/gap": 0.23310701562071812, "calib/mean_conf": 0.8596442687747036, "calib/mu_c": 0.9416463414634146, "calib/mu_w": 0.7085393258426965, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21482213438735176, "calib/std_conf": 0.247046330200576, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1501.0, "completions/max_terminated_length": 1501.0, "completions/mean_length": 906.625, "completions/mean_terminated_length": 921.0159301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 590.0, "epoch": 0.20693333333333333, "grad_norm": 0.4592355787754059, "learning_rate": 1.9444444444444447e-07, "loss": -0.0213, "num_tokens": 59602534.0, "reward": 1.322446584701538, "reward_std": 0.2598746418952942, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.763495683670044, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8049821853637695, "step": 194 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 10.02734375, "calib/ece": 0.28528225806451624, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.7862903225806451, "calib/gap": 0.17409643547941434, "calib/mean_conf": 0.8984274193548387, "calib/mu_c": 0.9644155844155845, "calib/mu_w": 0.7903191489361702, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2813709677419356, "calib/std_conf": 0.21392724394006452, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1850.0, "completions/max_terminated_length": 1850.0, "completions/mean_length": 873.3046875, "completions/mean_terminated_length": 901.4757690429688, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.208, "grad_norm": 0.4438525140285492, "learning_rate": 1.6666666666666668e-07, "loss": -0.0591, "num_tokens": 59955124.0, "reward": 1.2250550985336304, "reward_std": 0.3435608744621277, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6959050893783569, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7177851796150208, "step": 195 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 10.67578125, "calib/ece": 0.237992125984252, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8622047244094488, "calib/gap": 0.1403933933933933, "calib/mean_conf": 0.9466535433070866, "calib/mu_c": 0.9875555555555555, "calib/mu_w": 0.8471621621621622, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.237992125984252, "calib/std_conf": 0.14324220323165088, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1633.0, "completions/max_terminated_length": 1633.0, "completions/mean_length": 852.4296875, "completions/mean_terminated_length": 859.1417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 584.0, "epoch": 0.20906666666666668, "grad_norm": 0.370975524187088, "learning_rate": 1.3888888888888888e-07, "loss": -0.0231, "num_tokens": 60298930.0, "reward": 1.385732650756836, "reward_std": 0.22724959254264832, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7683027386665344, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7969499826431274, "step": 196 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.14453125, "calib/ece": 0.3686111111111112, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8055555555555556, "calib/gap": 0.07068238451217168, "calib/mean_conf": 0.9190079365079364, "calib/mu_c": 0.9501418439716312, "calib/mu_w": 0.8794594594594595, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3640476190476192, "calib/std_conf": 0.17895920418466515, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1786.0, "completions/max_terminated_length": 1786.0, "completions/mean_length": 915.42578125, "completions/mean_terminated_length": 933.661376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 549.0, "epoch": 0.21013333333333334, "grad_norm": 0.7663440704345703, "learning_rate": 1.1111111111111112e-07, "loss": -0.0453, "num_tokens": 60661375.0, "reward": 1.1283903121948242, "reward_std": 0.38009026646614075, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6173292398452759, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6820275187492371, "step": 197 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 10.44140625, "calib/ece": 0.19290196078431368, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.796078431372549, "calib/gap": 0.16206284153005468, "calib/mean_conf": 0.9104705882352941, "calib/mu_c": 0.9562295081967214, "calib/mu_w": 0.7941666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1928627450980392, "calib/std_conf": 0.19217989457809093, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2108.0, "completions/max_terminated_length": 2108.0, "completions/mean_length": 894.81640625, "completions/mean_terminated_length": 901.8621826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 508.0, "epoch": 0.2112, "grad_norm": 0.9934461712837219, "learning_rate": 8.333333333333334e-08, "loss": -0.0034, "num_tokens": 61018872.0, "reward": 1.4082475900650024, "reward_std": 0.3323134183883667, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7858519554138184, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.803473711013794, "step": 198 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 10.34375, "calib/ece": 0.22410358565737049, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8087649402390438, "calib/gap": 0.1680443609022556, "calib/mean_conf": 0.9133466135458167, "calib/mu_c": 0.9642285714285714, "calib/mu_w": 0.7961842105263158, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22011952191235057, "calib/std_conf": 0.1921858832578285, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2233.0, "completions/max_terminated_length": 2233.0, "completions/mean_length": 915.91015625, "completions/mean_terminated_length": 934.1553955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 577.0, "epoch": 0.21226666666666666, "grad_norm": 0.48377180099487305, "learning_rate": 5.555555555555556e-08, "loss": -0.0161, "num_tokens": 61380585.0, "reward": 1.3653557300567627, "reward_std": 0.35379722714424133, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.7610331773757935, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8127938508987427, "step": 199 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 10.03515625, "calib/ece": 0.2200819672131148, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.7909836065573771, "calib/gap": 0.17872466554514133, "calib/mean_conf": 0.9123770491803279, "calib/mu_c": 0.9658479532163743, "calib/mu_w": 0.787123287671233, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21581967213114756, "calib/std_conf": 0.1854460872505499, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2085.0, "completions/max_terminated_length": 2085.0, "completions/mean_length": 858.55078125, "completions/mean_terminated_length": 900.7745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 605.0, "epoch": 0.21333333333333335, "grad_norm": 0.3373366892337799, "learning_rate": 2.777777777777778e-08, "loss": -0.0863, "num_tokens": 61731462.0, "reward": 1.3362358808517456, "reward_std": 0.3129503130912781, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7492789030075073, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7932606935501099, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": -0.07741938081453555, "train_runtime": 10541.1044, "train_samples_per_second": 4.857, "train_steps_per_second": 0.019 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 61731462, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }