{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -9.102586197069666e-06, "aux_brier/mean_r": 0.4665906016643231, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_step_records": 6.153846153846154, "aux_brier/std_r": 0.17818317848902482, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.11138106137514114, "learning_rate": 2.5000000000000004e-07, "loss": 0.0318, "num_tokens": 264685.0, "reward": 0.04124843701720238, "reward_std": 0.0838509351015091, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": 1.2052168732018822e-07, "aux_brier/mean_r": 0.43024390700616333, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_step_records": 7.105263157894737, "aux_brier/std_r": 0.16724381615456782, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.029361596331000328, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 2.013177297881847e-08, "aux_brier/mean_r": 0.4436229850564684, "aux_brier/n_active_tok": 22.0, "aux_brier/n_step_records": 5.5, "aux_brier/std_r": 0.15689051124666417, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.6363636363636364, "calib/avg_num_step_conf": 0.30859375, "calib/ece": 0.7314285714285714, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7857142857142857, "calib/gap": 0.030909090909090997, "calib/mean_conf": 0.9457142857142856, "calib/mu_c": 0.9700000000000001, "calib/mu_w": 0.9390909090909091, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.7314285714285714, "calib/std_conf": 0.04670795064077662, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2842.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 649.76171875, "completions/mean_terminated_length": 707.8255004882812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.09515827894210815, "learning_rate": 7.5e-07, "loss": 0.0017, "num_tokens": 805062.0, "reward": 0.03631914034485817, "reward_std": 0.07378481328487396, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.012464063242077827, "rewards/format_reward_step": 0.04296875, "step": 3 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -8.53978453539485e-07, "aux_brier/mean_r": 0.5349999176604407, "aux_brier/n_active_tok": 18.571428571428573, "aux_brier/n_step_records": 4.642857142857143, "aux_brier/std_r": 0.17579160689326404, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.6111111111111112, "calib/avg_num_step_conf": 0.26171875, "calib/ece": 0.7313333333333334, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.039999999999999813, "calib/mean_conf": 0.9313333333333333, "calib/mu_c": 0.9633333333333333, "calib/mu_w": 0.9233333333333335, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.7313333333333334, "calib/std_conf": 0.07428473747831531, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 675.36328125, "completions/mean_terminated_length": 738.8590087890625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.0383746363222599, "learning_rate": 1.0000000000000002e-06, "loss": 0.0326, "num_tokens": 1084123.0, "reward": 0.038529880344867706, "reward_std": 0.08679422736167908, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.013494530692696571, "rewards/format_reward_step": 0.046875, "step": 4 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -1.0257448446182593e-08, "aux_brier/mean_r": 0.5654468197714199, "aux_brier/n_active_tok": 22.90909090909091, "aux_brier/n_step_records": 5.7272727272727275, "aux_brier/std_r": 0.1872368726710027, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.45238095238095233, "calib/avg_num_step_conf": 0.26171875, "calib/ece": 0.607, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": -0.13000000000000012, "calib/mean_conf": 0.651, "calib/mu_c": 0.5599999999999999, "calib/mu_w": 0.6900000000000001, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.479, "calib/std_conf": 0.42893938965779305, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 716.87109375, "completions/mean_terminated_length": 771.0882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.17645806074142456, "learning_rate": 1.25e-06, "loss": 0.0011, "num_tokens": 1374330.0, "reward": 0.02357129007577896, "reward_std": 0.05362445116043091, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.008347656577825546, "rewards/format_reward_step": 0.01953125, "step": 5 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": -2.138062927770079e-05, "aux_brier/mean_r": 0.43400503508746624, "aux_brier/n_active_tok": 20.25, "aux_brier/n_step_records": 5.0625, "aux_brier/std_r": 0.14133647322887555, "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.7, "calib/avg_num_step_conf": 0.33203125, "calib/ece": 0.6841666666666668, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.11899999999999988, "calib/mean_conf": 0.8508333333333334, "calib/mu_c": 0.95, "calib/mu_w": 0.8310000000000001, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.6841666666666668, "calib/std_conf": 0.2421589812958054, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 552.08203125, "completions/mean_terminated_length": 601.4169921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.0882822722196579, "learning_rate": 1.5e-06, "loss": 0.0196, "num_tokens": 1621615.0, "reward": 0.035184863954782486, "reward_std": 0.0783327966928482, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.015739452093839645, "rewards/format_reward_step": 0.0390625, "step": 6 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": -3.1514367644073597e-06, "aux_brier/mean_r": 0.3562329987684886, "aux_brier/n_active_tok": 19.733333333333334, "aux_brier/n_step_records": 4.933333333333334, "aux_brier/std_r": 0.14275573218862217, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.5808080808080809, "calib/avg_num_step_conf": 0.2890625, "calib/ece": 0.4835, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": 0.07646464646464657, "calib/mean_conf": 0.9334999999999999, "calib/mu_c": 0.9755555555555557, "calib/mu_w": 0.8990909090909092, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.4835, "calib/std_conf": 0.15906838152191025, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 684.50390625, "completions/mean_terminated_length": 758.5844116210938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.004410548135638237, "learning_rate": 1.75e-06, "loss": 0.0098, "num_tokens": 1904272.0, "reward": 0.06951679289340973, "reward_std": 0.1377854347229004, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.03587968647480011, "rewards/format_reward_step": 0.05078125, "step": 7 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -2.4912921218422923e-05, "aux_brier/mean_r": 0.6748109608888626, "aux_brier/n_active_tok": 23.666666666666668, "aux_brier/n_step_records": 5.916666666666667, "aux_brier/std_r": 0.11392478086054325, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.44642857142857145, "calib/avg_num_step_conf": 0.31640625, "calib/ece": 0.5731649999999999, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.09071666666666656, "calib/mean_conf": 0.8731649999999999, "calib/mu_c": 0.9366666666666666, "calib/mu_w": 0.8459500000000001, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.5731649999999999, "calib/std_conf": 0.23609897770850258, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 604.12109375, "completions/mean_terminated_length": 655.3178100585938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.0436667837202549, "learning_rate": 2.0000000000000003e-06, "loss": 0.059, "num_tokens": 2165439.0, "reward": 0.054405272006988525, "reward_std": 0.12038490921258926, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.02230859361588955, "rewards/format_reward_step": 0.05078125, "step": 8 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 8.70976960287706e-06, "aux_brier/mean_r": 0.4381483110288779, "aux_brier/n_active_tok": 19.666666666666668, "aux_brier/n_step_records": 4.916666666666667, "aux_brier/std_r": 0.14367802006502947, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.7272727272727273, "calib/avg_num_step_conf": 0.23828125, "calib/ece": 0.8883333333333333, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.020000000000000018, "calib/mean_conf": 0.9716666666666667, "calib/mu_c": 0.99, "calib/mu_w": 0.97, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.8883333333333333, "calib/std_conf": 0.024776781245530826, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 691.390625, "completions/mean_terminated_length": 783.1681518554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.09020941704511642, "learning_rate": 2.25e-06, "loss": -0.0037, "num_tokens": 2449971.0, "reward": 0.02004404366016388, "reward_std": 0.04632704704999924, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0020511718466877937, "rewards/format_reward_step": 0.03125, "step": 9 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": 1.704656992651543e-05, "aux_brier/mean_r": 0.4264591783285141, "aux_brier/n_active_tok": 23.5, "aux_brier/n_step_records": 5.875, "aux_brier/std_r": 0.1849947760347277, "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.65, "calib/avg_num_step_conf": 0.3828125, "calib/ece": 0.877142857142857, "calib/final_conf_rate": 0.08203125, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.9047619047619048, "calib/gap": 0.04749999999999999, "calib/mean_conf": 0.9247619047619047, "calib/mu_c": 0.97, "calib/mu_w": 0.9225, "calib/nonempty_final_conf_rate": 0.08203125, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.877142857142857, "calib/std_conf": 0.19010320624582666, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2985.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 634.58984375, "completions/mean_terminated_length": 688.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.0061858585104346275, "learning_rate": 2.5e-06, "loss": 0.0211, "num_tokens": 2719226.0, "reward": 0.03498310595750809, "reward_std": 0.08540120720863342, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.007119921967387199, "rewards/format_reward_step": 0.05859375, "step": 10 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": -1.0184447127414842e-05, "aux_brier/mean_r": 0.49724808775565843, "aux_brier/n_active_tok": 20.0, "aux_brier/n_step_records": 5.0, "aux_brier/std_r": 0.13798914714293045, "calib/answer_extract_rate": 0.12109375, "calib/auroc": 0.49122807017543857, "calib/avg_num_step_conf": 0.46484375, "calib/ece": 0.8022727272727272, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.7272727272727273, "calib/gap": -0.1829824561403509, "calib/mean_conf": 0.8613636363636363, "calib/mu_c": 0.7033333333333333, "calib/mu_w": 0.8863157894736842, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.7636363636363636, "calib/std_conf": 0.23559008033109802, "calib/step_conf_rate": 0.1171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 658.7265625, "completions/mean_terminated_length": 733.1912841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.005495699122548103, "learning_rate": 2.7500000000000004e-06, "loss": 0.0349, "num_tokens": 2992340.0, "reward": 0.054179199039936066, "reward_std": 0.10967986285686493, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.021404296159744263, "rewards/format_reward_step": 0.06640625, "step": 11 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.431231842257312e-07, "aux_brier/mean_r": 0.4989075498736423, "aux_brier/n_active_tok": 29.91304347826087, "aux_brier/n_step_records": 7.478260869565218, "aux_brier/std_r": 0.19141821327376302, "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.24074074074074076, "calib/avg_num_step_conf": 0.68359375, "calib/ece": 0.6233333333333333, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": -0.10844444444444434, "calib/mean_conf": 0.9066666666666666, "calib/mu_c": 0.838888888888889, "calib/mu_w": 0.9473333333333334, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.1796875, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.5775, "calib/std_conf": 0.18160549428790845, "calib/step_conf_rate": 0.1171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 667.94140625, "completions/mean_terminated_length": 721.4894409179688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.05467556044459343, "learning_rate": 3e-06, "loss": 0.0349, "num_tokens": 3267509.0, "reward": 0.0774708017706871, "reward_std": 0.1405467838048935, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.028633201494812965, "rewards/format_reward_step": 0.0703125, "step": 12 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.8873083033342795e-06, "aux_brier/mean_r": 0.468845642482241, "aux_brier/n_active_tok": 25.5, "aux_brier/n_step_records": 6.375, "aux_brier/std_r": 0.17154717879990736, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.2864583333333333, "calib/avg_num_step_conf": 0.6015625, "calib/ece": 0.8039285714285712, "calib/final_conf_rate": 0.109375, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": -0.02041666666666675, "calib/mean_conf": 0.9325, "calib/mu_c": 0.9149999999999999, "calib/mu_w": 0.9354166666666667, "calib/nonempty_final_conf_rate": 0.109375, "calib/nonempty_reasoning_rate": 0.1796875, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.7967857142857141, "calib/std_conf": 0.15810541239493173, "calib/step_conf_rate": 0.125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 582.0703125, "completions/mean_terminated_length": 634.0850830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.11166919767856598, "learning_rate": 3.2500000000000002e-06, "loss": 0.0642, "num_tokens": 3521111.0, "reward": 0.057707615196704865, "reward_std": 0.14456789195537567, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01208046916872263, "rewards/format_reward_step": 0.078125, "step": 13 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": -5.629314495791959e-07, "aux_brier/mean_r": 0.4207657706737518, "aux_brier/n_active_tok": 31.36, "aux_brier/n_step_records": 7.84, "aux_brier/std_r": 0.18341759517788886, "calib/answer_extract_rate": 0.19921875, "calib/auroc": 0.4802955665024631, "calib/avg_num_step_conf": 0.79296875, "calib/ece": 0.5972093023255814, "calib/final_conf_rate": 0.16796875, "calib/format_rate": 0.14453125, "calib/frac_conf_gt_0.9": 0.813953488372093, "calib/gap": 0.02763546798029559, "calib/mean_conf": 0.9227906976744186, "calib/mu_c": 0.9414285714285715, "calib/mu_w": 0.9137931034482759, "calib/nonempty_final_conf_rate": 0.16796875, "calib/nonempty_reasoning_rate": 0.23046875, "calib/nonempty_step_conf_rate": 0.1875, "calib/pce": 0.5972093023255814, "calib/std_conf": 0.13640441889785712, "calib/step_conf_rate": 0.1875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2882.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 658.42578125, "completions/mean_terminated_length": 699.4066772460938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.01957801915705204, "learning_rate": 3.5e-06, "loss": 0.0855, "num_tokens": 3795068.0, "reward": 0.14601826667785645, "reward_std": 0.23803023993968964, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.06063554435968399, "rewards/format_reward_step": 0.14453125, "step": 14 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.7064985956097114e-08, "aux_brier/mean_r": 0.4783921589454015, "aux_brier/n_active_tok": 39.666666666666664, "aux_brier/n_step_records": 9.916666666666666, "aux_brier/std_r": 0.20275885363419852, "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.4920634920634921, "calib/avg_num_step_conf": 0.94921875, "calib/ece": 0.7070588235294117, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.1171875, "calib/frac_conf_gt_0.9": 0.7352941176470589, "calib/gap": 0.008888888888888835, "calib/mean_conf": 0.9129411764705883, "calib/mu_c": 0.92, "calib/mu_w": 0.9111111111111112, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.2421875, "calib/nonempty_step_conf_rate": 0.2109375, "calib/pce": 0.7070588235294117, "calib/std_conf": 0.11168573061114533, "calib/step_conf_rate": 0.2109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 612.93359375, "completions/mean_terminated_length": 648.3925170898438, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.016, "grad_norm": 0.17819392681121826, "learning_rate": 3.7500000000000005e-06, "loss": 0.0837, "num_tokens": 4059859.0, "reward": 0.09566473960876465, "reward_std": 0.17815300822257996, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.03890898451209068, "rewards/format_reward_step": 0.1171875, "step": 15 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": 6.2164642237800335e-06, "aux_brier/mean_r": 0.5178940232705187, "aux_brier/n_active_tok": 44.592592592592595, "aux_brier/n_step_records": 11.148148148148149, "aux_brier/std_r": 0.21482459635094361, "calib/answer_extract_rate": 0.23046875, "calib/auroc": 0.5712945590994372, "calib/avg_num_step_conf": 1.2421875, "calib/ece": 0.6866296296296299, "calib/final_conf_rate": 0.2109375, "calib/format_rate": 0.1796875, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": 0.05401876172607878, "calib/mean_conf": 0.9273703703703705, "calib/mu_c": 0.9683846153846153, "calib/mu_w": 0.9143658536585365, "calib/nonempty_final_conf_rate": 0.2109375, "calib/nonempty_reasoning_rate": 0.28125, "calib/nonempty_step_conf_rate": 0.23828125, "calib/pce": 0.6866296296296299, "calib/std_conf": 0.14662553669458106, "calib/step_conf_rate": 0.23828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 652.91796875, "completions/mean_terminated_length": 708.25, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.06547030806541443, "learning_rate": 4.000000000000001e-06, "loss": 0.0998, "num_tokens": 4335854.0, "reward": 0.15457949042320251, "reward_std": 0.27846628427505493, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.05581796169281006, "rewards/format_reward_step": 0.1796875, "step": 16 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.576091839845418e-08, "aux_brier/mean_r": 0.4590467623164577, "aux_brier/n_active_tok": 55.354838709677416, "aux_brier/n_step_records": 13.838709677419354, "aux_brier/std_r": 0.2482275741715585, "calib/answer_extract_rate": 0.3828125, "calib/auroc": 0.530448717948718, "calib/avg_num_step_conf": 1.6796875, "calib/ece": 0.7132941176470589, "calib/final_conf_rate": 0.33203125, "calib/format_rate": 0.29296875, "calib/frac_conf_gt_0.9": 0.6823529411764706, "calib/gap": 0.034476495726495804, "calib/mean_conf": 0.8554117647058824, "calib/mu_c": 0.8846153846153848, "calib/mu_w": 0.850138888888889, "calib/nonempty_final_conf_rate": 0.33203125, "calib/nonempty_reasoning_rate": 0.4453125, "calib/nonempty_step_conf_rate": 0.3671875, "calib/pce": 0.7078823529411764, "calib/std_conf": 0.2424703884464171, "calib/step_conf_rate": 0.3671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 481.6171875, "completions/mean_terminated_length": 509.47930908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.15698353946208954, "learning_rate": 4.25e-06, "loss": 0.0976, "num_tokens": 4562676.0, "reward": 0.2217053771018982, "reward_std": 0.3235616087913513, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.09775897860527039, "rewards/format_reward_step": 0.29296875, "step": 17 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.126978218945263e-06, "aux_brier/mean_r": 0.457625154277374, "aux_brier/n_active_tok": 46.06896551724138, "aux_brier/n_step_records": 11.517241379310345, "aux_brier/std_r": 0.18960562271290812, "calib/answer_extract_rate": 0.33203125, "calib/auroc": 0.6062271062271063, "calib/avg_num_step_conf": 1.33203125, "calib/ece": 0.7436710526315787, "calib/final_conf_rate": 0.296875, "calib/format_rate": 0.2265625, "calib/frac_conf_gt_0.9": 0.7368421052631579, "calib/gap": 0.05925885225885241, "calib/mean_conf": 0.9147236842105262, "calib/mu_c": 0.9638461538461538, "calib/mu_w": 0.9045873015873014, "calib/nonempty_final_conf_rate": 0.296875, "calib/nonempty_reasoning_rate": 0.37109375, "calib/nonempty_step_conf_rate": 0.29296875, "calib/pce": 0.7436710526315787, "calib/std_conf": 0.1460139321654841, "calib/step_conf_rate": 0.29296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2916.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 552.4765625, "completions/mean_terminated_length": 591.7740478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.22656942903995514, "learning_rate": 4.5e-06, "loss": 0.1234, "num_tokens": 4814830.0, "reward": 0.1823437511920929, "reward_std": 0.3027515411376953, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.07312499731779099, "rewards/format_reward_step": 0.2265625, "step": 18 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.532799466471786e-09, "aux_brier/mean_r": 0.5357045959681273, "aux_brier/n_active_tok": 82.625, "aux_brier/n_step_records": 20.65625, "aux_brier/std_r": 0.2631514077074826, "calib/answer_extract_rate": 0.5859375, "calib/auroc": 0.5745745745745746, "calib/avg_num_step_conf": 2.71875, "calib/ece": 0.6729347826086958, "calib/final_conf_rate": 0.5390625, "calib/format_rate": 0.4765625, "calib/frac_conf_gt_0.9": 0.6884057971014492, "calib/gap": 0.07174674674674697, "calib/mean_conf": 0.868586956521739, "calib/mu_c": 0.9262962962962964, "calib/mu_w": 0.8545495495495494, "calib/nonempty_final_conf_rate": 0.5390625, "calib/nonempty_reasoning_rate": 0.6796875, "calib/nonempty_step_conf_rate": 0.6015625, "calib/pce": 0.6729347826086958, "calib/std_conf": 0.20455943214416447, "calib/step_conf_rate": 0.6015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2688.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 424.578125, "completions/mean_terminated_length": 436.5140380859375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.020266666666666665, "grad_norm": 0.1766931265592575, "learning_rate": 4.75e-06, "loss": 0.1175, "num_tokens": 5028282.0, "reward": 0.39152294397354126, "reward_std": 0.47060954570770264, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.17546679079532623, "rewards/format_reward_step": 0.4765625, "step": 19 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.381132152562415e-09, "aux_brier/mean_r": 0.49071262404322624, "aux_brier/n_active_tok": 106.875, "aux_brier/n_step_records": 26.71875, "aux_brier/std_r": 0.26794795552268624, "calib/answer_extract_rate": 0.7578125, "calib/auroc": 0.5599127006477049, "calib/avg_num_step_conf": 3.38671875, "calib/ece": 0.6079679144385026, "calib/final_conf_rate": 0.73046875, "calib/format_rate": 0.63671875, "calib/frac_conf_gt_0.9": 0.7593582887700535, "calib/gap": 0.07156857223317359, "calib/mean_conf": 0.8855080213903743, "calib/mu_c": 0.9367924528301885, "calib/mu_w": 0.8652238805970149, "calib/nonempty_final_conf_rate": 0.73046875, "calib/nonempty_reasoning_rate": 0.84765625, "calib/nonempty_step_conf_rate": 0.7578125, "calib/pce": 0.6050267379679144, "calib/std_conf": 0.20533039422178165, "calib/step_conf_rate": 0.7578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 331.04296875, "completions/mean_terminated_length": 333.64959716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.021333333333333333, "grad_norm": 0.20575417578220367, "learning_rate": 5e-06, "loss": 0.0832, "num_tokens": 5217901.0, "reward": 0.5955972075462341, "reward_std": 0.5120225548744202, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.2808261513710022, "rewards/format_reward_step": 0.63671875, "step": 20 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.3007841803691633e-08, "aux_brier/mean_r": 0.49773185327649117, "aux_brier/n_active_tok": 115.75, "aux_brier/n_step_records": 28.9375, "aux_brier/std_r": 0.2863457161001861, "calib/answer_extract_rate": 0.80859375, "calib/auroc": 0.5008394383394383, "calib/avg_num_step_conf": 3.76171875, "calib/ece": 0.6976001683501682, "calib/final_conf_rate": 0.7734375, "calib/format_rate": 0.69921875, "calib/frac_conf_gt_0.9": 0.7121212121212122, "calib/gap": -0.01446230158730144, "calib/mean_conf": 0.8709183501683503, "calib/mu_c": 0.8595238095238097, "calib/mu_w": 0.8739861111111111, "calib/nonempty_final_conf_rate": 0.7734375, "calib/nonempty_reasoning_rate": 0.87109375, "calib/nonempty_step_conf_rate": 0.796875, "calib/pce": 0.6781986531986531, "calib/std_conf": 0.22372157992792882, "calib/step_conf_rate": 0.796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2654.0, "completions/max_terminated_length": 2654.0, "completions/mean_length": 332.203125, "completions/mean_terminated_length": 336.1423034667969, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0224, "grad_norm": 0.16781505942344666, "learning_rate": 4.9722222222222224e-06, "loss": 0.0871, "num_tokens": 5405905.0, "reward": 0.5818346738815308, "reward_std": 0.4870641827583313, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.24140146374702454, "rewards/format_reward_step": 0.69921875, "step": 21 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.6415178223649907e-08, "aux_brier/mean_r": 0.5266828071326017, "aux_brier/n_active_tok": 128.375, "aux_brier/n_step_records": 32.09375, "aux_brier/std_r": 0.25951013108715415, "calib/answer_extract_rate": 0.890625, "calib/auroc": 0.5321059431524547, "calib/avg_num_step_conf": 4.1328125, "calib/ece": 0.7029192825112108, "calib/final_conf_rate": 0.87109375, "calib/format_rate": 0.80859375, "calib/frac_conf_gt_0.9": 0.7085201793721974, "calib/gap": 0.003939793281653947, "calib/mean_conf": 0.8812152466367713, "calib/mu_c": 0.8843953488372094, "calib/mu_w": 0.8804555555555554, "calib/nonempty_final_conf_rate": 0.87109375, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.8984375, "calib/pce": 0.6956547085201794, "calib/std_conf": 0.19189626855187153, "calib/step_conf_rate": 0.8984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2645.0, "completions/max_terminated_length": 2645.0, "completions/mean_length": 304.78125, "completions/mean_terminated_length": 307.18109130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.023466666666666667, "grad_norm": 0.04580605775117874, "learning_rate": 4.944444444444445e-06, "loss": 0.0556, "num_tokens": 5585745.0, "reward": 0.642554759979248, "reward_std": 0.4749007225036621, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.2811563313007355, "rewards/format_reward_step": 0.80859375, "step": 22 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1233109512009953e-08, "aux_brier/mean_r": 0.4905240759253502, "aux_brier/n_active_tok": 130.5, "aux_brier/n_step_records": 32.625, "aux_brier/std_r": 0.26758163096383214, "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.504391672088484, "calib/avg_num_step_conf": 4.12109375, "calib/ece": 0.6707635829662261, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.73568281938326, "calib/gap": 0.01826537988867194, "calib/mean_conf": 0.8954331864904551, "calib/mu_c": 0.9094339622641509, "calib/mu_w": 0.8911685823754789, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.6663582966226137, "calib/std_conf": 0.16834353443153843, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2684.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 295.0, "completions/mean_terminated_length": 295.0, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.024533333333333334, "grad_norm": 0.020392941311001778, "learning_rate": 4.9166666666666665e-06, "loss": 0.0604, "num_tokens": 5765201.0, "reward": 0.7049959897994995, "reward_std": 0.4462015628814697, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.30435892939567566, "rewards/format_reward_step": 0.828125, "step": 23 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.9477952162570986e-08, "aux_brier/mean_r": 0.5268711112439632, "aux_brier/n_active_tok": 149.25, "aux_brier/n_step_records": 37.3125, "aux_brier/std_r": 0.25730579486116767, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.5123832038725655, "calib/avg_num_step_conf": 4.71484375, "calib/ece": 0.7209322033898307, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.859375, "calib/frac_conf_gt_0.9": 0.788135593220339, "calib/gap": -0.009399977485084032, "calib/mean_conf": 0.9009322033898305, "calib/mu_c": 0.8934042553191488, "calib/mu_w": 0.9028042328042328, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.71135593220339, "calib/std_conf": 0.18308554677395472, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2267.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 277.44921875, "completions/mean_terminated_length": 278.5372619628906, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.0256, "grad_norm": 1.5058033466339111, "learning_rate": 4.888888888888889e-06, "loss": 0.0459, "num_tokens": 5940740.0, "reward": 0.6933838725090027, "reward_std": 0.4145248532295227, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.2735355496406555, "rewards/format_reward_step": 0.859375, "step": 24 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.266170539728002e-09, "aux_brier/mean_r": 0.4919162206351757, "aux_brier/n_active_tok": 133.0, "aux_brier/n_step_records": 33.25, "aux_brier/std_r": 0.24906104244291782, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5260896255371392, "calib/avg_num_step_conf": 4.171875, "calib/ece": 0.6376229508196721, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.7336065573770492, "calib/gap": 0.026436902569499376, "calib/mean_conf": 0.8897540983606556, "calib/mu_c": 0.9093650793650795, "calib/mu_w": 0.8829281767955801, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.6345901639344262, "calib/std_conf": 0.1810305794055931, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 806.0, "completions/max_terminated_length": 806.0, "completions/mean_length": 236.88671875, "completions/mean_terminated_length": 237.81570434570312, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.02666666666666667, "grad_norm": 0.9424216151237488, "learning_rate": 4.861111111111111e-06, "loss": 0.02, "num_tokens": 6104607.0, "reward": 0.7890994548797607, "reward_std": 0.4592776894569397, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.35171017050743103, "rewards/format_reward_step": 0.91015625, "step": 25 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.1941884476017748e-08, "aux_brier/mean_r": 0.53303661942482, "aux_brier/n_active_tok": 145.0, "aux_brier/n_step_records": 36.25, "aux_brier/std_r": 0.25267244642600417, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4302702702702702, "calib/avg_num_step_conf": 4.58984375, "calib/ece": 0.6846666666666668, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.7541666666666667, "calib/gap": -0.013503685503685192, "calib/mean_conf": 0.9055, "calib/mu_c": 0.8950909090909093, "calib/mu_w": 0.9085945945945945, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6805, "calib/std_conf": 0.15920557988546338, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2469.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 259.3046875, "completions/mean_terminated_length": 260.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.027733333333333332, "grad_norm": 1.2116645574569702, "learning_rate": 4.833333333333333e-06, "loss": 0.0213, "num_tokens": 6276229.0, "reward": 0.7475730180740356, "reward_std": 0.41038182377815247, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.31060469150543213, "rewards/format_reward_step": 0.91015625, "step": 26 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.219952121937599e-08, "aux_brier/mean_r": 0.49529169127345085, "aux_brier/n_active_tok": 141.75, "aux_brier/n_step_records": 35.4375, "aux_brier/std_r": 0.25249000173062086, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5004420866489832, "calib/avg_num_step_conf": 4.47265625, "calib/ece": 0.7320785123966943, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.7396694214876033, "calib/gap": 0.0075762283693315835, "calib/mean_conf": 0.8895165289256199, "calib/mu_c": 0.8958717948717947, "calib/mu_w": 0.8882955665024631, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.730219008264463, "calib/std_conf": 0.17947513506600266, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2084.0, "completions/max_terminated_length": 2084.0, "completions/mean_length": 250.83203125, "completions/mean_terminated_length": 250.83203125, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 0.0288, "grad_norm": 0.5905550122261047, "learning_rate": 4.805555555555556e-06, "loss": 0.0237, "num_tokens": 6445658.0, "reward": 0.6743470430374146, "reward_std": 0.4219324290752411, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.2755131423473358, "rewards/format_reward_step": 0.890625, "step": 27 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.6383348108717222e-08, "aux_brier/mean_r": 0.516123553737998, "aux_brier/n_active_tok": 150.75, "aux_brier/n_step_records": 37.6875, "aux_brier/std_r": 0.26661207247525454, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5077495107632094, "calib/avg_num_step_conf": 4.7890625, "calib/ece": 0.613115322580645, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.7419354838709677, "calib/gap": 0.02940432093933476, "calib/mean_conf": 0.9074701612903225, "calib/mu_c": 0.9282191780821919, "calib/mu_w": 0.8988148571428571, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.613115322580645, "calib/std_conf": 0.16147058654544122, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 929.0, "completions/max_terminated_length": 929.0, "completions/mean_length": 246.01953125, "completions/mean_terminated_length": 246.9843292236328, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.029866666666666666, "grad_norm": 0.8447901606559753, "learning_rate": 4.777777777777778e-06, "loss": 0.0126, "num_tokens": 6615583.0, "reward": 0.8513097167015076, "reward_std": 0.4552958607673645, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.3818013072013855, "rewards/format_reward_step": 0.94140625, "step": 28 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.4380359649239836e-09, "aux_brier/mean_r": 0.5108501221984625, "aux_brier/n_active_tok": 160.875, "aux_brier/n_step_records": 40.21875, "aux_brier/std_r": 0.25156099582090974, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4784796945505033, "calib/avg_num_step_conf": 5.08203125, "calib/ece": 0.71597868852459, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.7336065573770492, "calib/gap": 0.003972185583709376, "calib/mean_conf": 0.888355737704918, "calib/mu_c": 0.8916279069767442, "calib/mu_w": 0.8876557213930348, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.7140524590163934, "calib/std_conf": 0.18195316639773187, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2226.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 282.546875, "completions/mean_terminated_length": 282.546875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.030933333333333334, "grad_norm": 0.1679871529340744, "learning_rate": 4.75e-06, "loss": 0.057, "num_tokens": 6795043.0, "reward": 0.7051953077316284, "reward_std": 0.35713332891464233, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.2973436713218689, "rewards/format_reward_step": 0.92578125, "step": 29 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.6617992489031952e-08, "aux_brier/mean_r": 0.5188779952004552, "aux_brier/n_active_tok": 158.25, "aux_brier/n_step_records": 39.5625, "aux_brier/std_r": 0.270553941372782, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5260690962713293, "calib/avg_num_step_conf": 4.9765625, "calib/ece": 0.6969759036144578, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.6987951807228916, "calib/gap": 0.04295070570886883, "calib/mean_conf": 0.8857309236947792, "calib/mu_c": 0.9205744680851065, "calib/mu_w": 0.8776237623762376, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6969759036144578, "calib/std_conf": 0.1834667383037193, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2800.0, "completions/max_terminated_length": 2800.0, "completions/mean_length": 282.234375, "completions/mean_terminated_length": 282.234375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.032, "grad_norm": 0.09541727602481842, "learning_rate": 4.722222222222222e-06, "loss": 0.0657, "num_tokens": 6974279.0, "reward": 0.7456433773040771, "reward_std": 0.3405766189098358, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.32632362842559814, "rewards/format_reward_step": 0.953125, "step": 30 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0712281860170147e-09, "aux_brier/mean_r": 0.5162139581516385, "aux_brier/n_active_tok": 154.875, "aux_brier/n_step_records": 38.71875, "aux_brier/std_r": 0.2686573010869324, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5465606748864373, "calib/avg_num_step_conf": 4.8984375, "calib/ece": 0.7002838056680164, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.7125506072874493, "calib/gap": 0.05569385680294181, "calib/mean_conf": 0.8814174089068826, "calib/mu_c": 0.9267391304347825, "calib/mu_w": 0.8710452736318407, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.697733198380567, "calib/std_conf": 0.19941239363030375, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2089.0, "completions/max_terminated_length": 2089.0, "completions/mean_length": 269.60546875, "completions/mean_terminated_length": 269.60546875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.03306666666666667, "grad_norm": 0.014529847539961338, "learning_rate": 4.694444444444445e-06, "loss": 0.062, "num_tokens": 7149210.0, "reward": 0.736213207244873, "reward_std": 0.40180909633636475, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.327665239572525, "rewards/format_reward_step": 0.94921875, "step": 31 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.965743564763983e-09, "aux_brier/mean_r": 0.5892385160550475, "aux_brier/n_active_tok": 149.75, "aux_brier/n_step_records": 37.4375, "aux_brier/std_r": 0.2620336702093482, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5289386006663493, "calib/avg_num_step_conf": 4.71484375, "calib/ece": 0.6351626016260163, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.6097560975609756, "calib/gap": 0.028318895763921992, "calib/mean_conf": 0.858739837398374, "calib/mu_c": 0.8807272727272728, "calib/mu_w": 0.8524083769633508, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6351626016260163, "calib/std_conf": 0.18556559716716373, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/max_terminated_length": 710.0, "completions/mean_length": 235.26953125, "completions/mean_terminated_length": 235.26953125, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.034133333333333335, "grad_norm": 0.031410906463861465, "learning_rate": 4.666666666666667e-06, "loss": -0.0016, "num_tokens": 7316143.0, "reward": 0.7939093708992004, "reward_std": 0.3049905002117157, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.37876248359680176, "rewards/format_reward_step": 0.953125, "step": 32 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.9565520864860417e-08, "aux_brier/mean_r": 0.5720551488921046, "aux_brier/n_active_tok": 155.5, "aux_brier/n_step_records": 38.875, "aux_brier/std_r": 0.2680721143260598, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.596787651230705, "calib/avg_num_step_conf": 4.890625, "calib/ece": 0.6566985391766267, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.5737051792828686, "calib/gap": 0.06221916284244189, "calib/mean_conf": 0.8415590969455512, "calib/mu_c": 0.8921276595744679, "calib/mu_w": 0.829908496732026, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6555033200531208, "calib/std_conf": 0.2043614580001861, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 954.0, "completions/max_terminated_length": 954.0, "completions/mean_length": 259.1171875, "completions/mean_terminated_length": 259.1171875, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.0352, "grad_norm": 0.0941232442855835, "learning_rate": 4.638888888888889e-06, "loss": -0.0201, "num_tokens": 7489349.0, "reward": 0.759291410446167, "reward_std": 0.3515019416809082, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.3809155821800232, "rewards/format_reward_step": 0.9609375, "step": 33 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.896031539578736e-10, "aux_brier/mean_r": 0.6080580223351717, "aux_brier/n_active_tok": 154.125, "aux_brier/n_step_records": 38.53125, "aux_brier/std_r": 0.26558592077344656, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5050932977913176, "calib/avg_num_step_conf": 4.8359375, "calib/ece": 0.6429917322834646, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.531496062992126, "calib/gap": -0.01656700304645853, "calib/mean_conf": 0.8243311023622047, "calib/mu_c": 0.8111557692307692, "calib/mu_w": 0.8277227722772277, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6312992125984254, "calib/std_conf": 0.21329873845833844, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1837.0, "completions/max_terminated_length": 1837.0, "completions/mean_length": 240.45703125, "completions/mean_terminated_length": 240.45703125, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.03626666666666667, "grad_norm": 0.17440563440322876, "learning_rate": 4.611111111111112e-06, "loss": 0.0586, "num_tokens": 7656018.0, "reward": 0.7905939221382141, "reward_std": 0.3576458692550659, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.3967507779598236, "rewards/format_reward_step": 0.9765625, "step": 34 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.499155964119648e-08, "aux_brier/mean_r": 0.686547027900815, "aux_brier/n_active_tok": 171.375, "aux_brier/n_step_records": 42.84375, "aux_brier/std_r": 0.2598836631514132, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.43599257884972165, "calib/avg_num_step_conf": 5.41796875, "calib/ece": 0.5786454183266933, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.4342629482071713, "calib/gap": -0.042326530612244895, "calib/mean_conf": 0.7770517928286852, "calib/mu_c": 0.744, "calib/mu_w": 0.7863265306122449, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5682868525896415, "calib/std_conf": 0.2509806926328697, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 911.0, "completions/max_terminated_length": 911.0, "completions/mean_length": 274.07421875, "completions/mean_terminated_length": 275.1490478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.037333333333333336, "grad_norm": 0.39294958114624023, "learning_rate": 4.583333333333333e-06, "loss": -0.0014, "num_tokens": 7835437.0, "reward": 0.8168681859970093, "reward_std": 0.3421388268470764, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.43153518438339233, "rewards/format_reward_step": 0.98046875, "step": 35 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.459315790239504e-08, "aux_brier/mean_r": 0.6825767364352942, "aux_brier/n_active_tok": 166.0, "aux_brier/n_step_records": 41.5, "aux_brier/std_r": 0.27160742226988077, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.46835398969502856, "calib/avg_num_step_conf": 5.2109375, "calib/ece": 0.4311185770750988, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.40711462450592883, "calib/gap": -0.006703175045258414, "calib/mean_conf": 0.7431106719367588, "calib/mu_c": 0.7386860465116278, "calib/mu_w": 0.7453892215568863, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.41715415019762847, "calib/std_conf": 0.2554059072829808, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 795.0, "completions/max_terminated_length": 795.0, "completions/mean_length": 256.26953125, "completions/mean_terminated_length": 257.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.0384, "grad_norm": 0.04972701519727707, "learning_rate": 4.555555555555556e-06, "loss": 0.026, "num_tokens": 8003754.0, "reward": 0.9667409658432007, "reward_std": 0.4514142572879791, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.5310263633728027, "rewards/format_reward_step": 0.98046875, "step": 36 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.860965407444851e-09, "aux_brier/mean_r": 0.7446421310305595, "aux_brier/n_active_tok": 162.875, "aux_brier/n_step_records": 40.71875, "aux_brier/std_r": 0.2629914628341794, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5113897308716818, "calib/avg_num_step_conf": 5.3203125, "calib/ece": 0.43411895161290326, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.28225806451612906, "calib/gap": 0.011622807017543924, "calib/mean_conf": 0.6301713709677419, "calib/mu_c": 0.6391228070175439, "calib/mu_w": 0.6275, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4172258064516129, "calib/std_conf": 0.30517811408663514, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2265.0, "completions/max_terminated_length": 2265.0, "completions/mean_length": 263.08203125, "completions/mean_terminated_length": 265.1535339355469, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.039466666666666664, "grad_norm": 0.05207841098308563, "learning_rate": 4.527777777777778e-06, "loss": -0.0061, "num_tokens": 8178199.0, "reward": 0.8408390283584595, "reward_std": 0.37679728865623474, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.5508561134338379, "rewards/format_reward_step": 0.9609375, "step": 37 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.513858686301276e-08, "aux_brier/mean_r": 0.7988166250288486, "aux_brier/n_active_tok": 165.5, "aux_brier/n_step_records": 41.375, "aux_brier/std_r": 0.22988440841436386, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.503588920790407, "calib/avg_num_step_conf": 5.25, "calib/ece": 0.4076841897233202, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.21739130434782608, "calib/gap": 0.010231734504306589, "calib/mean_conf": 0.6263079051383399, "calib/mu_c": 0.634032258064516, "calib/mu_w": 0.6238005235602094, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3944664031620554, "calib/std_conf": 0.2844012504224562, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1877.0, "completions/max_terminated_length": 1877.0, "completions/mean_length": 253.7734375, "completions/mean_terminated_length": 253.7734375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.04053333333333333, "grad_norm": 0.05358852073550224, "learning_rate": 4.5e-06, "loss": 0.0593, "num_tokens": 8350053.0, "reward": 0.8800146579742432, "reward_std": 0.3463789224624634, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.582558810710907, "rewards/format_reward_step": 0.984375, "step": 38 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.265544696260612e-07, "aux_brier/mean_r": 0.832659974694252, "aux_brier/n_active_tok": 155.75, "aux_brier/n_step_records": 38.9375, "aux_brier/std_r": 0.1947343872161582, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5742611298166853, "calib/avg_num_step_conf": 4.9609375, "calib/ece": 0.3528174603174603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.1349206349206349, "calib/gap": 0.07870370370370372, "calib/mean_conf": 0.553531746031746, "calib/mu_c": 0.6153703703703703, "calib/mu_w": 0.5366666666666666, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.346031746031746, "calib/std_conf": 0.28829105625942264, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 727.0, "completions/max_terminated_length": 727.0, "completions/mean_length": 242.484375, "completions/mean_terminated_length": 243.435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.0416, "grad_norm": 0.05699974298477173, "learning_rate": 4.472222222222223e-06, "loss": 0.0147, "num_tokens": 8518217.0, "reward": 0.8628454208374023, "reward_std": 0.3036377727985382, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.6466941237449646, "rewards/format_reward_step": 0.98046875, "step": 39 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.666917991617673e-08, "aux_brier/mean_r": 0.875287689268589, "aux_brier/n_active_tok": 164.375, "aux_brier/n_step_records": 41.09375, "aux_brier/std_r": 0.1688983729109168, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44660098522167485, "calib/avg_num_step_conf": 5.1875, "calib/ece": 0.3629640316205534, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.13438735177865613, "calib/gap": -0.04999556650246301, "calib/mean_conf": 0.49711501976284583, "calib/mu_c": 0.457, "calib/mu_w": 0.506995566502463, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3312252964426878, "calib/std_conf": 0.29582267056908956, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2495.0, "completions/max_terminated_length": 2495.0, "completions/mean_length": 274.89453125, "completions/mean_terminated_length": 274.89453125, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.042666666666666665, "grad_norm": 0.07838453352451324, "learning_rate": 4.444444444444444e-06, "loss": 0.0898, "num_tokens": 8695350.0, "reward": 0.8447773456573486, "reward_std": 0.2978127896785736, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.6369218230247498, "rewards/format_reward_step": 0.98046875, "step": 40 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.1935416361660316e-08, "aux_brier/mean_r": 0.8809958901256323, "aux_brier/n_active_tok": 166.875, "aux_brier/n_step_records": 41.71875, "aux_brier/std_r": 0.1643846808001399, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.45552631578947367, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.3032662745098039, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.09803921568627451, "calib/gap": -0.047965427631579016, "calib/mean_conf": 0.4576749019607843, "calib/mu_c": 0.427578947368421, "calib/mu_w": 0.475544375, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19419607843137254, "calib/std_conf": 0.29450521390485984, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1018.0, "completions/max_terminated_length": 1018.0, "completions/mean_length": 252.859375, "completions/mean_terminated_length": 253.8509979248047, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.04373333333333333, "grad_norm": 0.09568986296653748, "learning_rate": 4.416666666666667e-06, "loss": -0.0414, "num_tokens": 8867330.0, "reward": 1.015214443206787, "reward_std": 0.34204167127609253, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6311702728271484, "rewards/format_reward_step": 0.97265625, "step": 41 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.2447231224373994e-07, "aux_brier/mean_r": 0.909093976020813, "aux_brier/n_active_tok": 160.75, "aux_brier/n_step_records": 40.1875, "aux_brier/std_r": 0.15306683687958866, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5635227272727272, "calib/avg_num_step_conf": 5.1796875, "calib/ece": 0.20831075697211154, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06772908366533864, "calib/gap": 0.04773825757575756, "calib/mean_conf": 0.34379282868525896, "calib/mu_c": 0.37726666666666664, "calib/mu_w": 0.3295284090909091, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12664940239043823, "calib/std_conf": 0.28590112816848845, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 688.0, "completions/max_terminated_length": 688.0, "completions/mean_length": 235.4140625, "completions/mean_terminated_length": 236.33726501464844, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.0448, "grad_norm": 0.07172514498233795, "learning_rate": 4.388888888888889e-06, "loss": 0.0137, "num_tokens": 9031964.0, "reward": 0.961334764957428, "reward_std": 0.3281788229942322, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7125266790390015, "rewards/format_reward_step": 0.98046875, "step": 42 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4526344632725952e-07, "aux_brier/mean_r": 0.9261396899819374, "aux_brier/n_active_tok": 165.75, "aux_brier/n_step_records": 41.4375, "aux_brier/std_r": 0.12585226859664544, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5406093270454375, "calib/avg_num_step_conf": 5.24609375, "calib/ece": 0.19644140624999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.05078125, "calib/gap": 0.028727374803503236, "calib/mean_conf": 0.32768359375, "calib/mu_c": 0.34821917808219177, "calib/mu_w": 0.31949180327868854, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11948437499999998, "calib/std_conf": 0.2702253932707564, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 812.0, "completions/max_terminated_length": 812.0, "completions/mean_length": 260.51953125, "completions/mean_terminated_length": 261.54119873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.04586666666666667, "grad_norm": 0.01999620907008648, "learning_rate": 4.361111111111112e-06, "loss": 0.0194, "num_tokens": 9203881.0, "reward": 0.9659649133682251, "reward_std": 0.27437886595726013, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7310470342636108, "rewards/format_reward_step": 0.99609375, "step": 43 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.250573412163284e-08, "aux_brier/mean_r": 0.9150092117488384, "aux_brier/n_active_tok": 175.125, "aux_brier/n_step_records": 43.78125, "aux_brier/std_r": 0.14740028593223542, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.45497076023391814, "calib/avg_num_step_conf": 5.5625, "calib/ece": 0.21309365079365075, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": -0.03666385964912283, "calib/mean_conf": 0.30082698412698416, "calib/mu_c": 0.2724561403508772, "calib/mu_w": 0.30912, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14386507936507933, "calib/std_conf": 0.25857803323534057, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 754.0, "completions/max_terminated_length": 754.0, "completions/mean_length": 270.55859375, "completions/mean_terminated_length": 271.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.046933333333333334, "grad_norm": 0.14104244112968445, "learning_rate": 4.333333333333334e-06, "loss": -0.0144, "num_tokens": 9379464.0, "reward": 0.8947416543960571, "reward_std": 0.22405636310577393, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7274039387702942, "rewards/format_reward_step": 0.98046875, "step": 44 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.4842205718623713e-07, "aux_brier/mean_r": 0.9442765265703201, "aux_brier/n_active_tok": 176.625, "aux_brier/n_step_records": 44.15625, "aux_brier/std_r": 0.10871278544072993, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5421146953405018, "calib/avg_num_step_conf": 5.65234375, "calib/ece": 0.19870677290836655, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.013008038914490483, "calib/mean_conf": 0.22671155378486058, "calib/mu_c": 0.2365064516129032, "calib/mu_w": 0.22349841269841272, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08920318725099602, "calib/std_conf": 0.23369570500082532, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2213.0, "completions/max_terminated_length": 2213.0, "completions/mean_length": 279.98828125, "completions/mean_terminated_length": 279.98828125, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.048, "grad_norm": 0.1291605681180954, "learning_rate": 4.305555555555556e-06, "loss": 0.0529, "num_tokens": 9556189.0, "reward": 0.9145276546478271, "reward_std": 0.32447636127471924, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7440481781959534, "rewards/format_reward_step": 0.97265625, "step": 45 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.736220054843645e-07, "aux_brier/mean_r": 0.9372042287141085, "aux_brier/n_active_tok": 192.25, "aux_brier/n_step_records": 48.0625, "aux_brier/std_r": 0.10789169149938971, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47533540575916233, "calib/avg_num_step_conf": 6.09765625, "calib/ece": 0.1786235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": -0.01834702225130888, "calib/mean_conf": 0.2170235294117647, "calib/mu_c": 0.20328125000000002, "calib/mu_w": 0.2216282722513089, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07233333333333333, "calib/std_conf": 0.22179601880144442, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 290.24609375, "completions/mean_terminated_length": 291.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.04906666666666667, "grad_norm": 0.02557116560637951, "learning_rate": 4.277777777777778e-06, "loss": 0.0226, "num_tokens": 9735260.0, "reward": 0.9308724999427795, "reward_std": 0.23405343294143677, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7469276189804077, "rewards/format_reward_step": 0.98828125, "step": 46 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.3324391426604585e-07, "aux_brier/mean_r": 0.9523821920156479, "aux_brier/n_active_tok": 209.875, "aux_brier/n_step_records": 52.46875, "aux_brier/std_r": 0.09776612001587637, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.43944636678200694, "calib/avg_num_step_conf": 6.87109375, "calib/ece": 0.2561639215686274, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.044754117647058805, "calib/mean_conf": 0.18407137254901965, "calib/mu_c": 0.15423529411764708, "calib/mu_w": 0.1989894117647059, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05345098039215687, "calib/std_conf": 0.1923053162828972, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 311.1328125, "completions/mean_terminated_length": 312.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.050133333333333335, "grad_norm": 0.12944844365119934, "learning_rate": 4.25e-06, "loss": 0.0482, "num_tokens": 9920886.0, "reward": 1.0040524005889893, "reward_std": 0.25592830777168274, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6958975791931152, "rewards/format_reward_step": 0.99609375, "step": 47 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0295764564238752e-06, "aux_brier/mean_r": 0.9551076292991638, "aux_brier/n_active_tok": 189.625, "aux_brier/n_step_records": 47.40625, "aux_brier/std_r": 0.0902596762025496, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47574041478533713, "calib/avg_num_step_conf": 5.98046875, "calib/ece": 0.19847544047619048, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": 0.009981954771561968, "calib/mean_conf": 0.1426356706349206, "calib/mu_c": 0.1497260273972603, "calib/mu_w": 0.13974407262569832, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02571428571428571, "calib/std_conf": 0.18141526620057544, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1951.0, "completions/max_terminated_length": 1951.0, "completions/mean_length": 282.65234375, "completions/mean_terminated_length": 283.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.0512, "grad_norm": 0.06616071611642838, "learning_rate": 4.222222222222223e-06, "loss": -0.0198, "num_tokens": 10096933.0, "reward": 0.9574721455574036, "reward_std": 0.23987793922424316, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7283260822296143, "rewards/format_reward_step": 0.98046875, "step": 48 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.204241914664621e-07, "aux_brier/mean_r": 0.960246542468667, "aux_brier/n_active_tok": 194.375, "aux_brier/n_step_records": 48.59375, "aux_brier/std_r": 0.08728372800396755, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.445681581685744, "calib/avg_num_step_conf": 6.15625, "calib/ece": 0.29314516129032253, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.008064516129032258, "calib/gap": -0.018623655913978465, "calib/mean_conf": 0.12508064516129033, "calib/mu_c": 0.11344086021505378, "calib/mu_w": 0.13206451612903225, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.021612903225806453, "calib/std_conf": 0.15610426606423905, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2554.0, "completions/max_terminated_length": 2554.0, "completions/mean_length": 328.65625, "completions/mean_terminated_length": 329.94512939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.05226666666666667, "grad_norm": 0.08786769956350327, "learning_rate": 4.194444444444445e-06, "loss": 0.0674, "num_tokens": 10285605.0, "reward": 1.0075724124908447, "reward_std": 0.27972546219825745, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6474773287773132, "rewards/format_reward_step": 0.96484375, "step": 49 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.409779658682723e-07, "aux_brier/mean_r": 0.9656286519020796, "aux_brier/n_active_tok": 208.75, "aux_brier/n_step_records": 52.1875, "aux_brier/std_r": 0.07430552037840243, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.503625134264232, "calib/avg_num_step_conf": 6.7421875, "calib/ece": 0.33008160000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.01101643394199786, "calib/mean_conf": 0.10271840000000002, "calib/mu_c": 0.0960204081632653, "calib/mu_w": 0.10703684210526317, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0204, "calib/std_conf": 0.1276265166077959, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2141.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 356.34765625, "completions/mean_terminated_length": 357.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.05333333333333334, "grad_norm": 0.03491717949509621, "learning_rate": 4.166666666666667e-06, "loss": 0.0364, "num_tokens": 10482190.0, "reward": 1.0313575267791748, "reward_std": 0.3168865442276001, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6410551071166992, "rewards/format_reward_step": 0.9765625, "step": 50 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.7866887092043982e-07, "aux_brier/mean_r": 0.9701175056397915, "aux_brier/n_active_tok": 215.0, "aux_brier/n_step_records": 53.75, "aux_brier/std_r": 0.07333021928207017, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.45217276099629045, "calib/avg_num_step_conf": 6.76953125, "calib/ece": 0.36771599999999993, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": -0.03115673025967143, "calib/mean_conf": 0.08148400000000001, "calib/mu_c": 0.0630392156862745, "calib/mu_w": 0.09419594594594594, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020600000000000004, "calib/std_conf": 0.12250848845692285, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2816.0, "completions/max_terminated_length": 2816.0, "completions/mean_length": 364.91796875, "completions/mean_terminated_length": 364.91796875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.0544, "grad_norm": 0.03101470321416855, "learning_rate": 4.138888888888889e-06, "loss": 0.1277, "num_tokens": 10684905.0, "reward": 1.0355937480926514, "reward_std": 0.2546389698982239, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6033125519752502, "rewards/format_reward_step": 0.97265625, "step": 51 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.90967254898311e-07, "aux_brier/mean_r": 0.951084066182375, "aux_brier/n_active_tok": 182.625, "aux_brier/n_step_records": 45.65625, "aux_brier/std_r": 0.107162126172625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5492837221095335, "calib/avg_num_step_conf": 5.75390625, "calib/ece": 0.4105543650793651, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.0025681541582149997, "calib/mean_conf": 0.06920753968253969, "calib/mu_c": 0.06782155172413794, "calib/mu_w": 0.07038970588235294, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00972222222222222, "calib/std_conf": 0.11668227507281927, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2119.0, "completions/max_terminated_length": 2119.0, "completions/mean_length": 326.8515625, "completions/mean_terminated_length": 326.8515625, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.055466666666666664, "grad_norm": 0.008304055780172348, "learning_rate": 4.111111111111111e-06, "loss": 0.0562, "num_tokens": 10876531.0, "reward": 1.0889616012573242, "reward_std": 0.335472047328949, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5745964050292969, "rewards/format_reward_step": 0.984375, "step": 52 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.2483734669085393e-07, "aux_brier/mean_r": 0.9476209785789251, "aux_brier/n_active_tok": 214.75, "aux_brier/n_step_records": 53.6875, "aux_brier/std_r": 0.11281460447935387, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47262636397842717, "calib/avg_num_step_conf": 6.72265625, "calib/ece": 0.40118577075098816, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.00032515991471217254, "calib/mean_conf": 0.076600790513834, "calib/mu_c": 0.07642857142857142, "calib/mu_w": 0.07675373134328359, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0037154150197628447, "calib/std_conf": 0.10464558442330969, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2417.0, "completions/max_terminated_length": 2417.0, "completions/mean_length": 362.67578125, "completions/mean_terminated_length": 362.67578125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.05653333333333333, "grad_norm": 0.07739207148551941, "learning_rate": 4.083333333333334e-06, "loss": 0.0519, "num_tokens": 11075200.0, "reward": 1.1005239486694336, "reward_std": 0.27294468879699707, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5739709138870239, "rewards/format_reward_step": 0.984375, "step": 53 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.3505694329563056e-07, "aux_brier/mean_r": 0.9560749251395464, "aux_brier/n_active_tok": 185.625, "aux_brier/n_step_records": 46.40625, "aux_brier/std_r": 0.09214459665599861, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44224615384615384, "calib/avg_num_step_conf": 5.80859375, "calib/ece": 0.44117647058823534, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01744000000000001, "calib/mean_conf": 0.059450980392156856, "calib/mu_c": 0.050559999999999994, "calib/mu_w": 0.068, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0052156862745098035, "calib/std_conf": 0.08524757801757396, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2118.0, "completions/max_terminated_length": 2118.0, "completions/mean_length": 310.1875, "completions/mean_terminated_length": 310.1875, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.0576, "grad_norm": 0.20029306411743164, "learning_rate": 4.055555555555556e-06, "loss": 0.0276, "num_tokens": 11260840.0, "reward": 1.122935175895691, "reward_std": 0.2616897523403168, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5464281439781189, "rewards/format_reward_step": 0.99609375, "step": 54 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.8029212708325915e-06, "aux_brier/mean_r": 0.9608405828475952, "aux_brier/n_active_tok": 194.125, "aux_brier/n_step_records": 48.53125, "aux_brier/std_r": 0.0891820899778395, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4409937888198758, "calib/avg_num_step_conf": 6.06640625, "calib/ece": 0.3182745098039216, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006961807849874449, "calib/mean_conf": 0.050352941176470586, "calib/mu_c": 0.04595744680851064, "calib/mu_w": 0.05291925465838509, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0658479602106132, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 321.52734375, "completions/mean_terminated_length": 321.52734375, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.058666666666666666, "grad_norm": 0.05579262599349022, "learning_rate": 4.027777777777779e-06, "loss": 0.0273, "num_tokens": 11450975.0, "reward": 1.0291873216629028, "reward_std": 0.25585275888442993, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6558117270469666, "rewards/format_reward_step": 0.99609375, "step": 55 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.645098424180036e-06, "aux_brier/mean_r": 0.9579719696193933, "aux_brier/n_active_tok": 200.5, "aux_brier/n_step_records": 50.125, "aux_brier/std_r": 0.09651838191894058, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5518922758977008, "calib/avg_num_step_conf": 6.36328125, "calib/ece": 0.34689062499999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 7.388271764402488e-05, "calib/mean_conf": 0.04951562500000001, "calib/mu_c": 0.04956122448979592, "calib/mu_w": 0.049487341772151895, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006796875, "calib/std_conf": 0.0827249758589229, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 365.02734375, "completions/mean_terminated_length": 366.4588317871094, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.05973333333333333, "grad_norm": 0.00834787730127573, "learning_rate": 4.000000000000001e-06, "loss": 0.0686, "num_tokens": 11651262.0, "reward": 1.0422992706298828, "reward_std": 0.24308203160762787, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6457598805427551, "rewards/format_reward_step": 0.99609375, "step": 56 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.901270504315992e-06, "aux_brier/mean_r": 0.9758158624172211, "aux_brier/n_active_tok": 199.5, "aux_brier/n_step_records": 49.875, "aux_brier/std_r": 0.06433247484164895, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.42701765447667084, "calib/avg_num_step_conf": 6.40234375, "calib/ece": 0.45488095238095233, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.016853720050441362, "calib/mean_conf": 0.04107142857142857, "calib/mu_c": 0.03237704918032787, "calib/mu_w": 0.04923076923076923, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005912698412698412, "calib/std_conf": 0.05065486682323643, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2103.0, "completions/max_terminated_length": 2103.0, "completions/mean_length": 351.8203125, "completions/mean_terminated_length": 353.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.0608, "grad_norm": 0.05881123244762421, "learning_rate": 3.972222222222223e-06, "loss": 0.039, "num_tokens": 11848120.0, "reward": 1.1062777042388916, "reward_std": 0.27622196078300476, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5344855785369873, "rewards/format_reward_step": 0.984375, "step": 57 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.054105633586545e-06, "aux_brier/mean_r": 0.982156939804554, "aux_brier/n_active_tok": 227.625, "aux_brier/n_step_records": 56.90625, "aux_brier/std_r": 0.05182582238558098, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4927622055533623, "calib/avg_num_step_conf": 7.1796875, "calib/ece": 0.3585549800796813, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0018974667719436622, "calib/mean_conf": 0.0478195219123506, "calib/mu_c": 0.04669313725490198, "calib/mu_w": 0.04859060402684564, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.055812938651239014, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2003.0, "completions/max_terminated_length": 2003.0, "completions/mean_length": 427.80859375, "completions/mean_terminated_length": 427.80859375, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.06186666666666667, "grad_norm": 0.06052467226982117, "learning_rate": 3.944444444444445e-06, "loss": 0.0563, "num_tokens": 12063959.0, "reward": 1.0392279624938965, "reward_std": 0.31383630633354187, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6100373268127441, "rewards/format_reward_step": 0.9765625, "step": 58 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.138005722616577e-06, "aux_brier/mean_r": 0.9684605058282614, "aux_brier/n_active_tok": 206.0, "aux_brier/n_step_records": 51.5, "aux_brier/std_r": 0.05938303344191809, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48520786335804056, "calib/avg_num_step_conf": 6.46875, "calib/ece": 0.38785714285714296, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.008137931034482765, "calib/mean_conf": 0.044682539682539685, "calib/mu_c": 0.039999999999999994, "calib/mu_w": 0.04813793103448276, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003968253968253968, "calib/std_conf": 0.08048648223224471, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2734.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 394.046875, "completions/mean_terminated_length": 394.046875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.06293333333333333, "grad_norm": 0.24501879513263702, "learning_rate": 3.916666666666667e-06, "loss": 0.0293, "num_tokens": 12271083.0, "reward": 1.058031678199768, "reward_std": 0.2685532867908478, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5915015935897827, "rewards/format_reward_step": 0.984375, "step": 59 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.383651549767163e-06, "aux_brier/mean_r": 0.9660649616271257, "aux_brier/n_active_tok": 208.5, "aux_brier/n_step_records": 52.125, "aux_brier/std_r": 0.09106568944844184, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.42832697117283136, "calib/avg_num_step_conf": 6.66015625, "calib/ece": 0.3901004016064257, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009650849019349747, "calib/mean_conf": 0.043634538152610446, "calib/mu_c": 0.038130841121495326, "calib/mu_w": 0.04778169014084507, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002008032128514056, "calib/std_conf": 0.04942381841983008, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 398.9453125, "completions/mean_terminated_length": 402.08660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.064, "grad_norm": 0.020204439759254456, "learning_rate": 3.88888888888889e-06, "loss": 0.0592, "num_tokens": 12482069.0, "reward": 1.0440466403961182, "reward_std": 0.3184412121772766, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5746240615844727, "rewards/format_reward_step": 0.96484375, "step": 60 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2311193083869298e-06, "aux_brier/mean_r": 0.9667092747986317, "aux_brier/n_active_tok": 208.25, "aux_brier/n_step_records": 52.0625, "aux_brier/std_r": 0.08627032622825936, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4604441360166551, "calib/avg_num_step_conf": 6.515625, "calib/ece": 0.44551587301587303, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005969970348873879, "calib/mean_conf": 0.0378968253968254, "calib/mu_c": 0.034793388429752076, "calib/mu_w": 0.040763358778625955, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001626984126984127, "calib/std_conf": 0.043112317203831664, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2083.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 357.7578125, "completions/mean_terminated_length": 357.7578125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.06506666666666666, "grad_norm": 0.14428959786891937, "learning_rate": 3.861111111111112e-06, "loss": 0.1105, "num_tokens": 12677719.0, "reward": 1.1001852750778198, "reward_std": 0.2249516397714615, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5413660407066345, "rewards/format_reward_step": 0.984375, "step": 61 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.236777115644411e-06, "aux_brier/mean_r": 0.9654866997152567, "aux_brier/n_active_tok": 232.625, "aux_brier/n_step_records": 58.15625, "aux_brier/std_r": 0.0781208942134981, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4384848484848485, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.3656064257028112, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006814545454545455, "calib/mean_conf": 0.03198393574297189, "calib/mu_c": 0.027878787878787878, "calib/mu_w": 0.03469333333333333, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.036922579140322535, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 445.8984375, "completions/mean_terminated_length": 445.8984375, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.06613333333333334, "grad_norm": 0.15827442705631256, "learning_rate": 3.833333333333334e-06, "loss": 0.0653, "num_tokens": 12898949.0, "reward": 1.0214182138442993, "reward_std": 0.26547694206237793, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6012977957725525, "rewards/format_reward_step": 0.96875, "step": 62 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.163154411427559e-06, "aux_brier/mean_r": 0.9615036211907864, "aux_brier/n_active_tok": 213.0, "aux_brier/n_step_records": 53.25, "aux_brier/std_r": 0.08634139206697, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5549057829759584, "calib/avg_num_step_conf": 6.6640625, "calib/ece": 0.42286345381526097, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003840935672514617, "calib/mean_conf": 0.03818072289156627, "calib/mu_c": 0.04026315789473684, "calib/mu_w": 0.036422222222222224, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0016064257028112448, "calib/std_conf": 0.04149344882098567, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2459.0, "completions/max_terminated_length": 2459.0, "completions/mean_length": 442.73046875, "completions/mean_terminated_length": 442.73046875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.0672, "grad_norm": 0.02104155533015728, "learning_rate": 3.8055555555555556e-06, "loss": 0.1186, "num_tokens": 13120928.0, "reward": 1.071668267250061, "reward_std": 0.31590867042541504, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5601105690002441, "rewards/format_reward_step": 0.97265625, "step": 63 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.880867311067959e-06, "aux_brier/mean_r": 0.970289058983326, "aux_brier/n_active_tok": 222.125, "aux_brier/n_step_records": 55.53125, "aux_brier/std_r": 0.06430960720354051, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4590719499478624, "calib/avg_num_step_conf": 6.98046875, "calib/ece": 0.5203373429718876, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005004445985401465, "calib/mean_conf": 0.031710849799196784, "calib/mu_c": 0.029459854014598538, "calib/mu_w": 0.0344643, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009236947791164659, "calib/std_conf": 0.03582347838243289, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 438.58203125, "completions/mean_terminated_length": 438.58203125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.06826666666666667, "grad_norm": 0.08066911995410919, "learning_rate": 3.777777777777778e-06, "loss": 0.2057, "num_tokens": 13336981.0, "reward": 1.1381856203079224, "reward_std": 0.34999555349349976, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.4668049216270447, "rewards/format_reward_step": 0.97265625, "step": 64 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.005258731676342e-06, "aux_brier/mean_r": 0.963504534214735, "aux_brier/n_active_tok": 178.75, "aux_brier/n_step_records": 44.6875, "aux_brier/std_r": 0.0832199622909684, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5001241233786383, "calib/avg_num_step_conf": 5.59765625, "calib/ece": 0.485984251968504, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003325265313721841, "calib/mean_conf": 0.029763779527559056, "calib/mu_c": 0.031374045801526716, "calib/mu_w": 0.028048780487804875, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.039950058723541126, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 323.36328125, "completions/mean_terminated_length": 323.36328125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.06933333333333333, "grad_norm": 0.05355348810553551, "learning_rate": 3.7500000000000005e-06, "loss": 0.011, "num_tokens": 13524786.0, "reward": 1.1353414058685303, "reward_std": 0.16901922225952148, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5101156234741211, "rewards/format_reward_step": 0.9921875, "step": 65 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.329625920481474e-06, "aux_brier/mean_r": 0.9741129148751497, "aux_brier/n_active_tok": 228.25, "aux_brier/n_step_records": 57.0625, "aux_brier/std_r": 0.07350183494418161, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4185541841855419, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.38208502024291496, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011385121388851213, "calib/mean_conf": 0.031680161943319844, "calib/mu_c": 0.024950495049504952, "calib/mu_w": 0.036335616438356165, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0024291497975708503, "calib/std_conf": 0.04121316821092346, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 458.4375, "completions/mean_terminated_length": 460.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.0704, "grad_norm": 0.20546875894069672, "learning_rate": 3.7222222222222225e-06, "loss": 0.1092, "num_tokens": 13748498.0, "reward": 1.023801326751709, "reward_std": 0.23484700918197632, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.5873928070068359, "rewards/format_reward_step": 0.96484375, "step": 66 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.023915574484117e-06, "aux_brier/mean_r": 0.9594349358230829, "aux_brier/n_active_tok": 200.375, "aux_brier/n_step_records": 50.09375, "aux_brier/std_r": 0.09274652416388562, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4965074930149861, "calib/avg_num_step_conf": 6.2734375, "calib/ece": 0.4674900398406375, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004207518415036825, "calib/mean_conf": 0.03067729083665339, "calib/mu_c": 0.028548387096774194, "calib/mu_w": 0.03275590551181102, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0020717131474103584, "calib/std_conf": 0.03969931109008024, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2792.0, "completions/max_terminated_length": 2792.0, "completions/mean_length": 406.73828125, "completions/mean_terminated_length": 406.73828125, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.07146666666666666, "grad_norm": 0.02782798372209072, "learning_rate": 3.694444444444445e-06, "loss": 0.0768, "num_tokens": 13957631.0, "reward": 1.1049299240112305, "reward_std": 0.203637033700943, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5212820172309875, "rewards/format_reward_step": 0.98046875, "step": 67 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.2966287282241176e-06, "aux_brier/mean_r": 0.9748028200119734, "aux_brier/n_active_tok": 197.875, "aux_brier/n_step_records": 49.46875, "aux_brier/std_r": 0.06608737350870797, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5255281690140845, "calib/avg_num_step_conf": 6.20703125, "calib/ece": 0.4135787401574803, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019320925553319955, "calib/mean_conf": 0.029098425196850394, "calib/mu_c": 0.030178571428571426, "calib/mu_w": 0.02824647887323943, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008661417322834645, "calib/std_conf": 0.03134357105275167, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2080.0, "completions/max_terminated_length": 2080.0, "completions/mean_length": 372.40625, "completions/mean_terminated_length": 372.40625, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.07253333333333334, "grad_norm": 0.012269485741853714, "learning_rate": 3.6666666666666666e-06, "loss": 0.0617, "num_tokens": 14157055.0, "reward": 1.0725542306900024, "reward_std": 0.22582462430000305, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5714668035507202, "rewards/format_reward_step": 0.984375, "step": 68 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.180892825644271e-06, "aux_brier/mean_r": 0.9656862393021584, "aux_brier/n_active_tok": 206.0, "aux_brier/n_step_records": 51.5, "aux_brier/std_r": 0.0876795893018425, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5125263157894737, "calib/avg_num_step_conf": 6.4609375, "calib/ece": 0.3537428571428572, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002587017543859642, "calib/mean_conf": 0.03588979591836734, "calib/mu_c": 0.03747368421052631, "calib/mu_w": 0.03488666666666667, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0009387755102040812, "calib/std_conf": 0.03719172743708296, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2710.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 447.09375, "completions/mean_terminated_length": 448.8470764160156, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.0736, "grad_norm": 0.010281720198690891, "learning_rate": 3.638888888888889e-06, "loss": 0.1073, "num_tokens": 14376007.0, "reward": 1.0063139200210571, "reward_std": 0.30199459195137024, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6111934781074524, "rewards/format_reward_step": 0.95703125, "step": 69 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.537944027187656e-06, "aux_brier/mean_r": 0.9632951878011227, "aux_brier/n_active_tok": 217.5, "aux_brier/n_step_records": 54.375, "aux_brier/std_r": 0.07789734461584885, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.543123543123543, "calib/avg_num_step_conf": 6.83203125, "calib/ece": 0.3863673469387755, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019175922117098572, "calib/mean_conf": 0.02995918367346939, "calib/mu_c": 0.031078431372549016, "calib/mu_w": 0.02916083916083916, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.029935276564733465, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2679.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 429.87109375, "completions/mean_terminated_length": 433.2558898925781, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.07466666666666667, "grad_norm": 0.009061937220394611, "learning_rate": 3.6111111111111115e-06, "loss": 0.0984, "num_tokens": 14593046.0, "reward": 1.0223637819290161, "reward_std": 0.2500405013561249, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.581642746925354, "rewards/format_reward_step": 0.95703125, "step": 70 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.187180342355879e-06, "aux_brier/mean_r": 0.962714608758688, "aux_brier/n_active_tok": 241.75, "aux_brier/n_step_records": 60.4375, "aux_brier/std_r": 0.07265134731187572, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.47179310344827585, "calib/avg_num_step_conf": 7.73828125, "calib/ece": 0.3796122448979592, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006168965517241384, "calib/mean_conf": 0.028551020408163267, "calib/mu_c": 0.024900000000000002, "calib/mu_w": 0.031068965517241386, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.031063503898771164, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2541.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 465.28515625, "completions/mean_terminated_length": 468.9488220214844, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.07573333333333333, "grad_norm": 0.0340365469455719, "learning_rate": 3.5833333333333335e-06, "loss": 0.0314, "num_tokens": 14816567.0, "reward": 1.0268982648849487, "reward_std": 0.2908337116241455, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.5841557383537292, "rewards/format_reward_step": 0.95703125, "step": 71 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.5296814010775677e-07, "aux_brier/mean_r": 0.9747793190181255, "aux_brier/n_active_tok": 226.625, "aux_brier/n_step_records": 56.65625, "aux_brier/std_r": 0.05982219804718625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5234375, "calib/avg_num_step_conf": 7.2265625, "calib/ece": 0.4115079365079365, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007857142857142924, "calib/mean_conf": 0.03293650793650794, "calib/mu_c": 0.032499999999999994, "calib/mu_w": 0.03328571428571429, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.032208829025138, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 405.49609375, "completions/mean_terminated_length": 408.68896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.0768, "grad_norm": 0.03765683248639107, "learning_rate": 3.555555555555556e-06, "loss": 0.0189, "num_tokens": 15024782.0, "reward": 1.072993278503418, "reward_std": 0.2650940716266632, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5732234120368958, "rewards/format_reward_step": 0.984375, "step": 72 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.2650630163444383e-07, "aux_brier/mean_r": 0.96903938613832, "aux_brier/n_active_tok": 205.125, "aux_brier/n_step_records": 51.28125, "aux_brier/std_r": 0.07217369535737816, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5583427994446548, "calib/avg_num_step_conf": 6.43359375, "calib/ece": 0.5186166007905139, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0047917455509276785, "calib/mean_conf": 0.0307905138339921, "calib/mu_c": 0.03294964028776978, "calib/mu_w": 0.028157894736842104, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.025276486693721338, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2505.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 403.0703125, "completions/mean_terminated_length": 404.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.07786666666666667, "grad_norm": 0.008194060996174812, "learning_rate": 3.5277777777777784e-06, "loss": 0.0419, "num_tokens": 15235000.0, "reward": 1.1540610790252686, "reward_std": 0.312055766582489, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.4756191372871399, "rewards/format_reward_step": 0.984375, "step": 73 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.800286688422251e-06, "aux_brier/mean_r": 0.9864268880337477, "aux_brier/n_active_tok": 220.25, "aux_brier/n_step_records": 55.0625, "aux_brier/std_r": 0.04252590535179479, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5526883830455259, "calib/avg_num_step_conf": 6.8828125, "calib/ece": 0.3870517928286853, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002327315541601257, "calib/mean_conf": 0.02729083665338645, "calib/mu_c": 0.028653846153846155, "calib/mu_w": 0.026326530612244898, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0284358345919624, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2516.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 430.62109375, "completions/mean_terminated_length": 430.62109375, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.07893333333333333, "grad_norm": 0.01938088983297348, "learning_rate": 3.5e-06, "loss": 0.1094, "num_tokens": 15449167.0, "reward": 1.0454785823822021, "reward_std": 0.24137258529663086, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5959769487380981, "rewards/format_reward_step": 0.98046875, "step": 74 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.034184655221651e-07, "aux_brier/mean_r": 0.9466332420706749, "aux_brier/n_active_tok": 205.875, "aux_brier/n_step_records": 51.46875, "aux_brier/std_r": 0.12050289758371946, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5027017783857729, "calib/avg_num_step_conf": 6.4765625, "calib/ece": 0.6355078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002503419972640225, "calib/mean_conf": 0.028554687500000002, "calib/mu_c": 0.028470588235294116, "calib/mu_w": 0.028720930232558138, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.026690151681422566, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 372.0625, "completions/mean_terminated_length": 373.5215759277344, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.08, "grad_norm": 0.009493130259215832, "learning_rate": 3.4722222222222224e-06, "loss": 0.024, "num_tokens": 15649167.0, "reward": 1.2571179866790771, "reward_std": 0.2399531900882721, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.37222224473953247, "rewards/format_reward_step": 1.0, "step": 75 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.1600205329930233e-06, "aux_brier/mean_r": 0.9614088963717222, "aux_brier/n_active_tok": 195.0, "aux_brier/n_step_records": 48.75, "aux_brier/std_r": 0.08628721506556758, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5208985108246778, "calib/avg_num_step_conf": 6.171875, "calib/ece": 0.4908300395256917, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017619822300087608, "calib/mean_conf": 0.026956521739130435, "calib/mu_c": 0.026106870229007637, "calib/mu_w": 0.027868852459016397, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.025441284936222247, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2505.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 396.73046875, "completions/mean_terminated_length": 398.2862854003906, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.08106666666666666, "grad_norm": 0.007620580494403839, "learning_rate": 3.444444444444445e-06, "loss": 0.053, "num_tokens": 15853786.0, "reward": 1.1284105777740479, "reward_std": 0.267156183719635, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.49801722168922424, "rewards/format_reward_step": 0.984375, "step": 76 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.43920975648993e-06, "aux_brier/mean_r": 0.9507123623043299, "aux_brier/n_active_tok": 235.625, "aux_brier/n_step_records": 58.90625, "aux_brier/std_r": 0.09280098486487987, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48168750000000005, "calib/avg_num_step_conf": 7.41796875, "calib/ece": 0.47800395256916994, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005437499999999956, "calib/mean_conf": 0.027924901185770755, "calib/mu_c": 0.02765625, "calib/mu_w": 0.028199999999999996, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025091577183262024, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2532.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 391.96484375, "completions/mean_terminated_length": 393.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.08213333333333334, "grad_norm": 0.00921631883829832, "learning_rate": 3.416666666666667e-06, "loss": 0.0304, "num_tokens": 16058793.0, "reward": 1.122776746749878, "reward_std": 0.25447046756744385, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5145446062088013, "rewards/format_reward_step": 0.98828125, "step": 77 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.817728139789757e-06, "aux_brier/mean_r": 0.9637213163077831, "aux_brier/n_active_tok": 214.625, "aux_brier/n_step_records": 53.65625, "aux_brier/std_r": 0.08493510884818534, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.44793650793650797, "calib/avg_num_step_conf": 6.76171875, "calib/ece": 0.47300796812749, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007043809523809522, "calib/mean_conf": 0.02898406374501992, "calib/mu_c": 0.02547619047619047, "calib/mu_w": 0.03251999999999999, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02632662871508452, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2710.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 476.6953125, "completions/mean_terminated_length": 476.6953125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.0832, "grad_norm": 0.008832041174173355, "learning_rate": 3.3888888888888893e-06, "loss": 0.0677, "num_tokens": 16288851.0, "reward": 1.1074577569961548, "reward_std": 0.2579970955848694, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5079561471939087, "rewards/format_reward_step": 0.9765625, "step": 78 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.684306469995072e-07, "aux_brier/mean_r": 0.9586722366511822, "aux_brier/n_active_tok": 218.875, "aux_brier/n_step_records": 54.71875, "aux_brier/std_r": 0.08254650658091123, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.570573708206687, "calib/avg_num_step_conf": 6.87890625, "calib/ece": 0.5259288537549407, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00728850050658561, "calib/mean_conf": 0.03138339920948617, "calib/mu_c": 0.03460992907801418, "calib/mu_w": 0.027321428571428573, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.027905530234962152, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 437.12890625, "completions/mean_terminated_length": 438.8431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.08426666666666667, "grad_norm": 0.013517139479517937, "learning_rate": 3.3611111111111117e-06, "loss": -0.0253, "num_tokens": 16507132.0, "reward": 1.1672985553741455, "reward_std": 0.21669147908687592, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.4738820195198059, "rewards/format_reward_step": 0.98828125, "step": 79 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.3656758808842113e-06, "aux_brier/mean_r": 0.949337549507618, "aux_brier/n_active_tok": 218.75, "aux_brier/n_step_records": 54.6875, "aux_brier/std_r": 0.10197577065173391, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5894612417568744, "calib/avg_num_step_conf": 6.8671875, "calib/ece": 0.5234196078431372, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007606196341918628, "calib/mean_conf": 0.02952156862745098, "calib/mu_c": 0.032921985815602836, "calib/mu_w": 0.025315789473684208, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02312469449697928, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1988.0, "completions/max_terminated_length": 1988.0, "completions/mean_length": 393.44921875, "completions/mean_terminated_length": 393.44921875, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.08533333333333333, "grad_norm": 0.0070471311919391155, "learning_rate": 3.3333333333333333e-06, "loss": -0.0146, "num_tokens": 16710015.0, "reward": 1.1659431457519531, "reward_std": 0.2769979238510132, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.47627267241477966, "rewards/format_reward_step": 0.9921875, "step": 80 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.170599793837157e-05, "aux_brier/mean_r": 0.9600299783051014, "aux_brier/n_active_tok": 235.25, "aux_brier/n_step_records": 58.8125, "aux_brier/std_r": 0.09283288330425421, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5137362637362638, "calib/avg_num_step_conf": 7.578125, "calib/ece": 0.4889357429718876, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015265029088558438, "calib/mean_conf": 0.03315261044176707, "calib/mu_c": 0.03242307692307692, "calib/mu_w": 0.033949579831932766, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.026506815127541713, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2486.0, "completions/max_terminated_length": 2486.0, "completions/mean_length": 453.296875, "completions/mean_terminated_length": 458.6719665527344, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.0864, "grad_norm": 0.038333695381879807, "learning_rate": 3.3055555555555558e-06, "loss": 0.0022, "num_tokens": 16932307.0, "reward": 1.1181458234786987, "reward_std": 0.24708852171897888, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.49602100253105164, "rewards/format_reward_step": 0.97265625, "step": 81 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.6000700942007136e-06, "aux_brier/mean_r": 0.9693782553076744, "aux_brier/n_active_tok": 204.875, "aux_brier/n_step_records": 51.21875, "aux_brier/std_r": 0.06665047882688668, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5607108854331958, "calib/avg_num_step_conf": 6.6015625, "calib/ece": 0.5140230158730158, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0024394668359251047, "calib/mean_conf": 0.0314531746031746, "calib/mu_c": 0.032566423357664236, "calib/mu_w": 0.03012695652173913, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009126984126984126, "calib/std_conf": 0.025955362129148344, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2613.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 409.9609375, "completions/mean_terminated_length": 411.56866455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.08746666666666666, "grad_norm": 0.037460923194885254, "learning_rate": 3.277777777777778e-06, "loss": 0.0528, "num_tokens": 17142809.0, "reward": 1.1479532718658447, "reward_std": 0.23592022061347961, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.48243796825408936, "rewards/format_reward_step": 0.984375, "step": 82 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4372123365302691e-06, "aux_brier/mean_r": 0.9516628421843052, "aux_brier/n_active_tok": 242.375, "aux_brier/n_step_records": 60.59375, "aux_brier/std_r": 0.10251851239809184, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47732272256728775, "calib/avg_num_step_conf": 7.86328125, "calib/ece": 0.5144199999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004950181159420301, "calib/mean_conf": 0.03758, "calib/mu_c": 0.03536231884057971, "calib/mu_w": 0.04031250000000001, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0292548047335818, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2829.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 476.83203125, "completions/mean_terminated_length": 480.58660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.08853333333333334, "grad_norm": 0.013157090172171593, "learning_rate": 3.2500000000000002e-06, "loss": 0.0164, "num_tokens": 17372142.0, "reward": 1.1456961631774902, "reward_std": 0.20791073143482208, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.47341006994247437, "rewards/format_reward_step": 0.9765625, "step": 83 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.3670594156273141e-06, "aux_brier/mean_r": 0.9507100284099579, "aux_brier/n_active_tok": 210.0, "aux_brier/n_step_records": 52.5, "aux_brier/std_r": 0.0909985100661288, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5358067299396031, "calib/avg_num_step_conf": 6.62890625, "calib/ece": 0.44058392156862747, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000962393689140903, "calib/mean_conf": 0.03941607843137254, "calib/mu_c": 0.03991803278688526, "calib/mu_w": 0.038955639097744355, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007843137254901962, "calib/std_conf": 0.02738242345069304, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2047.0, "completions/max_terminated_length": 2047.0, "completions/mean_length": 390.2890625, "completions/mean_terminated_length": 390.2890625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.0896, "grad_norm": 0.007138044573366642, "learning_rate": 3.2222222222222227e-06, "loss": -0.0007, "num_tokens": 17577976.0, "reward": 1.1134302616119385, "reward_std": 0.23144583404064178, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5552836656570435, "rewards/format_reward_step": 0.99609375, "step": 84 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.3662479832476926e-06, "aux_brier/mean_r": 0.9500475041568279, "aux_brier/n_active_tok": 210.875, "aux_brier/n_step_records": 52.71875, "aux_brier/std_r": 0.11137834395231039, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5707538075959129, "calib/avg_num_step_conf": 6.8046875, "calib/ece": 0.42306, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.012885097358781579, "calib/mean_conf": 0.04494000000000001, "calib/mu_c": 0.0517948717948718, "calib/mu_w": 0.03890977443609022, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.06567569108886484, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 445.80859375, "completions/mean_terminated_length": 449.31890869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.09066666666666667, "grad_norm": 0.017374947667121887, "learning_rate": 3.1944444444444443e-06, "loss": 0.0396, "num_tokens": 17799927.0, "reward": 1.0805864334106445, "reward_std": 0.25652146339416504, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5567206144332886, "rewards/format_reward_step": 0.96875, "step": 85 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.2155011792388564e-07, "aux_brier/mean_r": 0.9598411656916142, "aux_brier/n_active_tok": 211.125, "aux_brier/n_step_records": 52.78125, "aux_brier/std_r": 0.08565618247666862, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5334625322997416, "calib/avg_num_step_conf": 6.83984375, "calib/ece": 0.4392771084337349, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001961240310077529, "calib/mean_conf": 0.042650602409638555, "calib/mu_c": 0.04366666666666667, "calib/mu_w": 0.041705426356589144, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.02628623736363773, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2535.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 434.8359375, "completions/mean_terminated_length": 438.25982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.09173333333333333, "grad_norm": 0.01723681390285492, "learning_rate": 3.1666666666666667e-06, "loss": 0.0246, "num_tokens": 18016757.0, "reward": 1.0925931930541992, "reward_std": 0.2459348440170288, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5422476530075073, "rewards/format_reward_step": 0.96875, "step": 86 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.1829252447879153e-06, "aux_brier/mean_r": 0.9530973471701145, "aux_brier/n_active_tok": 184.375, "aux_brier/n_step_records": 46.09375, "aux_brier/std_r": 0.1026550768638117, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5455542264752791, "calib/avg_num_step_conf": 5.8828125, "calib/ece": 0.5633904382470121, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0032243487506645463, "calib/mean_conf": 0.04378087649402391, "calib/mu_c": 0.04505263157894737, "calib/mu_w": 0.041828282828282826, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007968127490039841, "calib/std_conf": 0.029788325489807886, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 404.4921875, "completions/mean_terminated_length": 404.4921875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.0928, "grad_norm": 0.0068040755577385426, "learning_rate": 3.138888888888889e-06, "loss": 0.0849, "num_tokens": 18225803.0, "reward": 1.1933517456054688, "reward_std": 0.2901657521724701, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.4374694228172302, "rewards/format_reward_step": 0.98046875, "step": 87 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.245816298511837e-06, "aux_brier/mean_r": 0.9574446994811296, "aux_brier/n_active_tok": 211.375, "aux_brier/n_step_records": 52.84375, "aux_brier/std_r": 0.10464780768597848, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5586356589147287, "calib/avg_num_step_conf": 6.71875, "calib/ece": 0.44190944881889765, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004141705426356591, "calib/mean_conf": 0.05021653543307087, "calib/mu_c": 0.05232, "calib/mu_w": 0.04817829457364341, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.030649985584714927, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1589.0, "completions/max_terminated_length": 1589.0, "completions/mean_length": 426.578125, "completions/mean_terminated_length": 428.2510070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.09386666666666667, "grad_norm": 0.006259854417294264, "learning_rate": 3.1111111111111116e-06, "loss": 0.0088, "num_tokens": 18444855.0, "reward": 1.122266411781311, "reward_std": 0.2067279815673828, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5515658855438232, "rewards/format_reward_step": 0.9921875, "step": 88 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.0160124454132733e-06, "aux_brier/mean_r": 0.9578261040151119, "aux_brier/n_active_tok": 222.125, "aux_brier/n_step_records": 55.53125, "aux_brier/std_r": 0.09223490446311189, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5310858718487393, "calib/avg_num_step_conf": 7.05859375, "calib/ece": 0.43447530364372466, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003557641806722689, "calib/mean_conf": 0.054998380566801626, "calib/mu_c": 0.0568420168067227, "calib/mu_w": 0.05328437500000001, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003846153846153846, "calib/std_conf": 0.057754933867518225, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2753.0, "completions/max_terminated_length": 2753.0, "completions/mean_length": 488.46875, "completions/mean_terminated_length": 492.3149719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.09493333333333333, "grad_norm": 0.010806899517774582, "learning_rate": 3.0833333333333336e-06, "loss": 0.1249, "num_tokens": 18678791.0, "reward": 1.0781443119049072, "reward_std": 0.21829913556575775, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5391401052474976, "rewards/format_reward_step": 0.95703125, "step": 89 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.9249041996609604e-06, "aux_brier/mean_r": 0.9169828165322542, "aux_brier/n_active_tok": 214.0, "aux_brier/n_step_records": 53.5, "aux_brier/std_r": 0.13160156455342076, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5309161744376021, "calib/avg_num_step_conf": 6.7265625, "calib/ece": 0.5235450980392158, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006281261782078579, "calib/mean_conf": 0.0512, "calib/mu_c": 0.05093150684931507, "calib/mu_w": 0.05155963302752293, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010980392156862747, "calib/std_conf": 0.03150093680524189, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2603.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 430.7890625, "completions/mean_terminated_length": 430.7890625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.096, "grad_norm": 0.006005620583891869, "learning_rate": 3.055555555555556e-06, "loss": 0.0308, "num_tokens": 18892393.0, "reward": 1.1884281635284424, "reward_std": 0.23653236031532288, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.48027539253234863, "rewards/format_reward_step": 0.99609375, "step": 90 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.0485723201124628e-07, "aux_brier/mean_r": 0.9632558040320873, "aux_brier/n_active_tok": 213.25, "aux_brier/n_step_records": 53.3125, "aux_brier/std_r": 0.09411390119566931, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5263927793339558, "calib/avg_num_step_conf": 6.81640625, "calib/ece": 0.4806889763779528, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013075007780889991, "calib/mean_conf": 0.054350393700787404, "calib/mu_c": 0.054962962962962957, "calib/mu_w": 0.05365546218487396, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0017716535433070864, "calib/std_conf": 0.027829473341390652, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 432.390625, "completions/mean_terminated_length": 435.7952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.09706666666666666, "grad_norm": 0.013703332282602787, "learning_rate": 3.0277777777777776e-06, "loss": -0.0139, "num_tokens": 19110797.0, "reward": 1.1502869129180908, "reward_std": 0.2125389277935028, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5152104496955872, "rewards/format_reward_step": 0.98828125, "step": 91 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.2309375828366562e-06, "aux_brier/mean_r": 0.9827230200171471, "aux_brier/n_active_tok": 199.875, "aux_brier/n_step_records": 49.96875, "aux_brier/std_r": 0.049810309556050925, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6128033040784718, "calib/avg_num_step_conf": 6.41015625, "calib/ece": 0.5340691699604744, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013376839184305635, "calib/mean_conf": 0.05486363636363638, "calib/mu_c": 0.06036241610738256, "calib/mu_w": 0.04698557692307693, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03224220011413279, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2588.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 413.03515625, "completions/mean_terminated_length": 414.6549377441406, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.09813333333333334, "grad_norm": 0.017262710258364677, "learning_rate": 3e-06, "loss": 0.0073, "num_tokens": 19323254.0, "reward": 1.1943002939224243, "reward_std": 0.22977006435394287, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.47251349687576294, "rewards/format_reward_step": 0.98828125, "step": 92 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.407620197184215e-07, "aux_brier/mean_r": 0.9631425198167562, "aux_brier/n_active_tok": 228.625, "aux_brier/n_step_records": 57.15625, "aux_brier/std_r": 0.09391147123096744, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5354609929078015, "calib/avg_num_step_conf": 7.3046875, "calib/ece": 0.48689763779527556, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005275340488294714, "calib/mean_conf": 0.06822047244094488, "calib/mu_c": 0.07056737588652481, "calib/mu_w": 0.0652920353982301, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0378272936886621, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2164.0, "completions/max_terminated_length": 2164.0, "completions/mean_length": 455.73046875, "completions/mean_terminated_length": 457.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.0992, "grad_norm": 0.008477514609694481, "learning_rate": 2.9722222222222225e-06, "loss": 0.0068, "num_tokens": 19545697.0, "reward": 1.175150752067566, "reward_std": 0.2387315183877945, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5131032466888428, "rewards/format_reward_step": 0.9921875, "step": 93 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.906177939660971e-06, "aux_brier/mean_r": 0.9568438623100519, "aux_brier/n_active_tok": 201.875, "aux_brier/n_step_records": 50.46875, "aux_brier/std_r": 0.09177897716654115, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5424356559949781, "calib/avg_num_step_conf": 6.32421875, "calib/ece": 0.47162845849802365, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004514752040175762, "calib/mean_conf": 0.06196837944664032, "calib/mu_c": 0.06407407407407406, "calib/mu_w": 0.0595593220338983, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02775554974491735, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2378.0, "completions/max_terminated_length": 2378.0, "completions/mean_length": 419.078125, "completions/mean_terminated_length": 419.078125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.10026666666666667, "grad_norm": 0.0068230461329221725, "learning_rate": 2.944444444444445e-06, "loss": 0.0389, "num_tokens": 19761661.0, "reward": 1.152474045753479, "reward_std": 0.1997266560792923, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5239591598510742, "rewards/format_reward_step": 0.98828125, "step": 94 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.5015060450540574e-06, "aux_brier/mean_r": 0.9502352625131607, "aux_brier/n_active_tok": 225.375, "aux_brier/n_step_records": 56.34375, "aux_brier/std_r": 0.1195903504267335, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6110787360787361, "calib/avg_num_step_conf": 7.078125, "calib/ece": 0.5386117647058823, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015423853923853925, "calib/mean_conf": 0.0731529411764706, "calib/mu_c": 0.07914102564102564, "calib/mu_w": 0.06371717171717171, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04445729545080831, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2643.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 454.34765625, "completions/mean_terminated_length": 454.34765625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.10133333333333333, "grad_norm": 0.00697029335424304, "learning_rate": 2.916666666666667e-06, "loss": 0.0133, "num_tokens": 19984102.0, "reward": 1.22639000415802, "reward_std": 0.21001799404621124, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.4758726954460144, "rewards/format_reward_step": 0.99609375, "step": 95 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7062705548465118e-07, "aux_brier/mean_r": 0.9452364314347506, "aux_brier/n_active_tok": 212.625, "aux_brier/n_step_records": 53.15625, "aux_brier/std_r": 0.08910403393747401, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6026949241234956, "calib/avg_num_step_conf": 6.71875, "calib/ece": 0.533715748031496, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012022684458398727, "calib/mean_conf": 0.08045748031496062, "calib/mu_c": 0.08509615384615384, "calib/mu_w": 0.07307346938775511, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03719851410901925, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1948.0, "completions/max_terminated_length": 1948.0, "completions/mean_length": 406.0859375, "completions/mean_terminated_length": 406.0859375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.1024, "grad_norm": 0.007448885124176741, "learning_rate": 2.888888888888889e-06, "loss": 0.0046, "num_tokens": 20193876.0, "reward": 1.222224473953247, "reward_std": 0.21784768998622894, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.47483548521995544, "rewards/format_reward_step": 0.98828125, "step": 96 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.889215185048371e-07, "aux_brier/mean_r": 0.9710249453783035, "aux_brier/n_active_tok": 219.125, "aux_brier/n_step_records": 54.78125, "aux_brier/std_r": 0.07545451291298377, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5865035665123263, "calib/avg_num_step_conf": 7.00390625, "calib/ece": 0.43564624505928856, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01067150544362408, "calib/mean_conf": 0.0821403162055336, "calib/mu_c": 0.08728625954198473, "calib/mu_w": 0.07661475409836065, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03485147871008432, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2230.0, "completions/max_terminated_length": 2230.0, "completions/mean_length": 415.05078125, "completions/mean_terminated_length": 416.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.10346666666666667, "grad_norm": 0.010315366089344025, "learning_rate": 2.861111111111111e-06, "loss": 0.0025, "num_tokens": 20405201.0, "reward": 1.1453659534454346, "reward_std": 0.271666020154953, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5580261945724487, "rewards/format_reward_step": 0.98828125, "step": 97 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.338425999225512e-06, "aux_brier/mean_r": 0.9326566867530346, "aux_brier/n_active_tok": 232.75, "aux_brier/n_step_records": 58.1875, "aux_brier/std_r": 0.12610501226299675, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4484565548780488, "calib/avg_num_step_conf": 7.81640625, "calib/ece": 0.42171314741035854, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006692073170731699, "calib/mean_conf": 0.09390438247011954, "calib/mu_c": 0.09062500000000001, "calib/mu_w": 0.09731707317073171, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002828685258964143, "calib/std_conf": 0.037457742734368214, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2562.0, "completions/max_terminated_length": 2562.0, "completions/mean_length": 441.90234375, "completions/mean_terminated_length": 447.1423034667969, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.10453333333333334, "grad_norm": 0.011622050777077675, "learning_rate": 2.8333333333333335e-06, "loss": 0.0242, "num_tokens": 20624512.0, "reward": 1.130502462387085, "reward_std": 0.2599697709083557, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5610722899436951, "rewards/format_reward_step": 0.98046875, "step": 98 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.222514389309097e-07, "aux_brier/mean_r": 0.9750086478888988, "aux_brier/n_active_tok": 232.875, "aux_brier/n_step_records": 58.21875, "aux_brier/std_r": 0.06022263411432505, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4948188354715918, "calib/avg_num_step_conf": 7.28125, "calib/ece": 0.21792156862745093, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007359368391371829, "calib/mean_conf": 0.10364705882352941, "calib/mu_c": 0.10414634146341464, "calib/mu_w": 0.10341040462427746, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.041976101290910464, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2266.0, "completions/max_terminated_length": 2266.0, "completions/mean_length": 511.4921875, "completions/mean_terminated_length": 513.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.1056, "grad_norm": 0.0069901542738080025, "learning_rate": 2.805555555555556e-06, "loss": 0.042, "num_tokens": 20861254.0, "reward": 0.9979431629180908, "reward_std": 0.2512783408164978, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.7261476516723633, "rewards/format_reward_step": 0.9921875, "step": 99 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.260137929732124e-08, "aux_brier/mean_r": 0.9512135703116655, "aux_brier/n_active_tok": 237.5, "aux_brier/n_step_records": 59.375, "aux_brier/std_r": 0.0975370660671615, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5722095092804476, "calib/avg_num_step_conf": 7.46875, "calib/ece": 0.34857142857142853, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012265446224256293, "calib/mean_conf": 0.10380952380952381, "calib/mu_c": 0.1105263157894737, "calib/mu_w": 0.0982608695652174, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04462999814803803, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2536.0, "completions/max_terminated_length": 2536.0, "completions/mean_length": 478.3515625, "completions/mean_terminated_length": 480.22747802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.10666666666666667, "grad_norm": 0.006798333488404751, "learning_rate": 2.7777777777777783e-06, "loss": 0.0498, "num_tokens": 21091120.0, "reward": 1.091367244720459, "reward_std": 0.28487706184387207, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6232812404632568, "rewards/format_reward_step": 0.98046875, "step": 100 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.5825019481229283e-07, "aux_brier/mean_r": 0.9715043678879738, "aux_brier/n_active_tok": 249.25, "aux_brier/n_step_records": 62.3125, "aux_brier/std_r": 0.06585373247071402, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6253488077118214, "calib/avg_num_step_conf": 7.9609375, "calib/ece": 0.3217047244094488, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018248604769152715, "calib/mean_conf": 0.10349212598425198, "calib/mu_c": 0.11398148148148149, "calib/mu_w": 0.09573287671232877, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.04132507409580167, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2390.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 492.88671875, "completions/mean_terminated_length": 494.81964111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.10773333333333333, "grad_norm": 0.010612020269036293, "learning_rate": 2.7500000000000004e-06, "loss": 0.0225, "num_tokens": 21324291.0, "reward": 1.0756598711013794, "reward_std": 0.2772831916809082, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6463895440101624, "rewards/format_reward_step": 0.984375, "step": 101 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.9400852950546277e-07, "aux_brier/mean_r": 0.9528163000941277, "aux_brier/n_active_tok": 222.875, "aux_brier/n_step_records": 55.71875, "aux_brier/std_r": 0.08900663114764029, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48057563242127005, "calib/avg_num_step_conf": 7.0625, "calib/ece": 0.49513636363636365, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004844572792978855, "calib/mean_conf": 0.09877667984189722, "calib/mu_c": 0.09678523489932885, "calib/mu_w": 0.1016298076923077, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002490118577075099, "calib/std_conf": 0.033584689270610014, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 410.69140625, "completions/mean_terminated_length": 410.69140625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.1088, "grad_norm": 0.009063053876161575, "learning_rate": 2.7222222222222224e-06, "loss": 0.051, "num_tokens": 21536124.0, "reward": 1.2032110691070557, "reward_std": 0.20576247572898865, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5081568360328674, "rewards/format_reward_step": 0.98828125, "step": 102 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0741288932192283e-06, "aux_brier/mean_r": 0.9643094725906849, "aux_brier/n_active_tok": 228.375, "aux_brier/n_step_records": 57.09375, "aux_brier/std_r": 0.0914556451534736, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.477803291236127, "calib/avg_num_step_conf": 7.421875, "calib/ece": 0.44535059760956175, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00857960199004977, "calib/mean_conf": 0.10875298804780878, "calib/mu_c": 0.10475373134328357, "calib/mu_w": 0.11333333333333334, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0101195219123506, "calib/std_conf": 0.044190762012516475, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2773.0, "completions/max_terminated_length": 2773.0, "completions/mean_length": 493.59375, "completions/mean_terminated_length": 497.4803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.10986666666666667, "grad_norm": 0.06507208198308945, "learning_rate": 2.6944444444444444e-06, "loss": 0.0255, "num_tokens": 21767036.0, "reward": 1.151968002319336, "reward_std": 0.237682044506073, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.553184449672699, "rewards/format_reward_step": 0.98046875, "step": 103 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.589181492435483e-08, "aux_brier/mean_r": 0.9585258103907108, "aux_brier/n_active_tok": 244.25, "aux_brier/n_step_records": 61.0625, "aux_brier/std_r": 0.07711920307338005, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4770601050877868, "calib/avg_num_step_conf": 7.67578125, "calib/ece": 0.2894039215686274, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.005117647058823546, "calib/mean_conf": 0.11389019607843137, "calib/mu_c": 0.1169607843137255, "calib/mu_w": 0.11184313725490196, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0016470588235294116, "calib/std_conf": 0.07039732964772301, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 450.671875, "completions/mean_terminated_length": 450.671875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.11093333333333333, "grad_norm": 0.007625887636095285, "learning_rate": 2.666666666666667e-06, "loss": 0.0233, "num_tokens": 21989088.0, "reward": 1.0647350549697876, "reward_std": 0.2007926106452942, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6730026602745056, "rewards/format_reward_step": 0.99609375, "step": 104 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.933796837440884e-07, "aux_brier/mean_r": 0.9700458273291588, "aux_brier/n_active_tok": 234.25, "aux_brier/n_step_records": 58.5625, "aux_brier/std_r": 0.06699328882677946, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4370927318295739, "calib/avg_num_step_conf": 7.35546875, "calib/ece": 0.3704545454545455, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011060776942355868, "calib/mean_conf": 0.11073122529644269, "calib/mu_c": 0.10491666666666669, "calib/mu_w": 0.11597744360902255, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0034387351778656103, "calib/std_conf": 0.042635031863209744, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1899.0, "completions/max_terminated_length": 1899.0, "completions/mean_length": 466.12890625, "completions/mean_terminated_length": 467.9568786621094, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.112, "grad_norm": 0.007053926587104797, "learning_rate": 2.6388888888888893e-06, "loss": 0.0356, "num_tokens": 22214177.0, "reward": 1.113884687423706, "reward_std": 0.30461353063583374, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6039764881134033, "rewards/format_reward_step": 0.98828125, "step": 105 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.904037203967153e-07, "aux_brier/mean_r": 0.9507361706346273, "aux_brier/n_active_tok": 209.5, "aux_brier/n_step_records": 52.375, "aux_brier/std_r": 0.0957951597229112, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5116509826515588, "calib/avg_num_step_conf": 6.6875, "calib/ece": 0.40036800000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005174828756161598, "calib/mean_conf": 0.114672, "calib/mu_c": 0.1121259842519685, "calib/mu_w": 0.1173008130081301, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00352, "calib/std_conf": 0.0396669688279808, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 470.02734375, "completions/mean_terminated_length": 473.72833251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.11306666666666666, "grad_norm": 0.00906135980039835, "learning_rate": 2.6111111111111113e-06, "loss": 0.0201, "num_tokens": 22439088.0, "reward": 1.1326165199279785, "reward_std": 0.18609586358070374, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5773407220840454, "rewards/format_reward_step": 0.9765625, "step": 106 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.955192106330774e-07, "aux_brier/mean_r": 0.9620216339826584, "aux_brier/n_active_tok": 221.875, "aux_brier/n_step_records": 55.46875, "aux_brier/std_r": 0.08636283704981906, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5112620298955703, "calib/avg_num_step_conf": 7.01171875, "calib/ece": 0.5195714285714286, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00022169135212614377, "calib/mean_conf": 0.11931746031746034, "calib/mu_c": 0.11939751552795033, "calib/mu_w": 0.11917582417582419, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05628571540479323, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2697.0, "completions/max_terminated_length": 2697.0, "completions/mean_length": 431.62109375, "completions/mean_terminated_length": 433.3137512207031, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.11413333333333334, "grad_norm": 0.012936992570757866, "learning_rate": 2.5833333333333337e-06, "loss": 0.0289, "num_tokens": 22654199.0, "reward": 1.240332007408142, "reward_std": 0.2317640781402588, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.4847656488418579, "rewards/format_reward_step": 0.98046875, "step": 107 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.5913763619249695e-07, "aux_brier/mean_r": 0.947864418849349, "aux_brier/n_active_tok": 234.25, "aux_brier/n_step_records": 58.5625, "aux_brier/std_r": 0.10858365721651353, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5257129014396458, "calib/avg_num_step_conf": 7.453125, "calib/ece": 0.5413385826771653, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020023532668881905, "calib/mean_conf": 0.1241732283464567, "calib/mu_c": 0.12485119047619049, "calib/mu_w": 0.1228488372093023, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002047244094488189, "calib/std_conf": 0.051391134761721934, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2067.0, "completions/max_terminated_length": 2067.0, "completions/mean_length": 466.7578125, "completions/mean_terminated_length": 468.5882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1152, "grad_norm": 0.023743530735373497, "learning_rate": 2.5555555555555557e-06, "loss": 0.0231, "num_tokens": 22876921.0, "reward": 1.2706942558288574, "reward_std": 0.2029799222946167, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.4812142550945282, "rewards/format_reward_step": 0.98828125, "step": 108 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.174225951338652e-08, "aux_brier/mean_r": 0.970881124958396, "aux_brier/n_active_tok": 229.5, "aux_brier/n_step_records": 57.375, "aux_brier/std_r": 0.06951277135522105, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5561998215878681, "calib/avg_num_step_conf": 7.421875, "calib/ece": 0.4140035856573705, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003254224544411863, "calib/mean_conf": 0.11587689243027889, "calib/mu_c": 0.11740676691729324, "calib/mu_w": 0.11415254237288137, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.04247941455881125, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2714.0, "completions/max_terminated_length": 2714.0, "completions/mean_length": 472.9609375, "completions/mean_terminated_length": 476.6850280761719, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.11626666666666667, "grad_norm": 0.05211355909705162, "learning_rate": 2.5277777777777778e-06, "loss": -0.0204, "num_tokens": 23102599.0, "reward": 1.1488374471664429, "reward_std": 0.15115833282470703, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5640995502471924, "rewards/format_reward_step": 0.9765625, "step": 109 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.271944652936341e-07, "aux_brier/mean_r": 0.9641071297228336, "aux_brier/n_active_tok": 217.75, "aux_brier/n_step_records": 54.4375, "aux_brier/std_r": 0.06642241131339688, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.43316316884099076, "calib/avg_num_step_conf": 6.82421875, "calib/ece": 0.3868809523809524, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009405558706749861, "calib/mean_conf": 0.11026190476190477, "calib/mu_c": 0.10544715447154472, "calib/mu_w": 0.11485271317829458, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004523809523809523, "calib/std_conf": 0.04006436856007984, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 467.7421875, "completions/mean_terminated_length": 467.7421875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.11733333333333333, "grad_norm": 0.007286607287824154, "learning_rate": 2.5e-06, "loss": 0.1205, "num_tokens": 23327261.0, "reward": 1.1156096458435059, "reward_std": 0.28626197576522827, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5874385833740234, "rewards/format_reward_step": 0.9765625, "step": 110 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.032805717948726e-06, "aux_brier/mean_r": 0.9505273532122374, "aux_brier/n_active_tok": 235.125, "aux_brier/n_step_records": 58.78125, "aux_brier/std_r": 0.09087945113424212, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5248902376033059, "calib/avg_num_step_conf": 7.40625, "calib/ece": 0.362281124497992, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004171487603305796, "calib/mean_conf": 0.12727710843373494, "calib/mu_c": 0.12942148760330582, "calib/mu_w": 0.12525000000000003, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0018072289156626504, "calib/std_conf": 0.052667322940273605, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 515.25390625, "completions/mean_terminated_length": 517.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.1184, "grad_norm": 0.0968957245349884, "learning_rate": 2.4722222222222226e-06, "loss": 0.1184, "num_tokens": 23566574.0, "reward": 1.1099567413330078, "reward_std": 0.24360547959804535, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6038892269134521, "rewards/format_reward_step": 0.97265625, "step": 111 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.00301592107644e-07, "aux_brier/mean_r": 0.927757628262043, "aux_brier/n_active_tok": 213.5, "aux_brier/n_step_records": 53.375, "aux_brier/std_r": 0.12386916055402253, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6456380208333334, "calib/avg_num_step_conf": 6.8515625, "calib/ece": 0.3512096774193549, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025682291666666607, "calib/mean_conf": 0.13266129032258067, "calib/mu_c": 0.14591666666666664, "calib/mu_w": 0.12023437500000003, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.05318271716250197, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 496.76171875, "completions/mean_terminated_length": 502.6521911621094, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.11946666666666667, "grad_norm": 0.007562459446489811, "learning_rate": 2.4444444444444447e-06, "loss": 0.0182, "num_tokens": 23801665.0, "reward": 1.1025023460388184, "reward_std": 0.2597165107727051, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6131343841552734, "rewards/format_reward_step": 0.9609375, "step": 112 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.306155305609181e-07, "aux_brier/mean_r": 0.9560148566961288, "aux_brier/n_active_tok": 221.25, "aux_brier/n_step_records": 55.3125, "aux_brier/std_r": 0.09734270189073868, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4966818477553676, "calib/avg_num_step_conf": 7.1875, "calib/ece": 0.43724302788844627, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001425634352634958, "calib/mean_conf": 0.14044621513944222, "calib/mu_c": 0.14104827586206894, "calib/mu_w": 0.139622641509434, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0572453315448762, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2637.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 459.30078125, "completions/mean_terminated_length": 462.9173278808594, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.12053333333333334, "grad_norm": 0.03345976024866104, "learning_rate": 2.4166666666666667e-06, "loss": 0.0041, "num_tokens": 24024446.0, "reward": 1.1983696222305298, "reward_std": 0.28644973039627075, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5512908697128296, "rewards/format_reward_step": 0.98046875, "step": 113 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.858275019034799e-07, "aux_brier/mean_r": 0.9533816874027252, "aux_brier/n_active_tok": 226.5, "aux_brier/n_step_records": 56.625, "aux_brier/std_r": 0.08270411010744283, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46139021796916535, "calib/avg_num_step_conf": 7.2265625, "calib/ece": 0.47403187250996015, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0105544922913344, "calib/mean_conf": 0.13568924302788846, "calib/mu_c": 0.13152631578947369, "calib/mu_w": 0.14208080808080809, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0020717131474103584, "calib/std_conf": 0.05428843437966434, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 458.37109375, "completions/mean_terminated_length": 461.9803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.1216, "grad_norm": 0.011670351028442383, "learning_rate": 2.388888888888889e-06, "loss": 0.0572, "num_tokens": 24246813.0, "reward": 1.2144755125045776, "reward_std": 0.2181263417005539, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5219646096229553, "rewards/format_reward_step": 0.98046875, "step": 114 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.111080653430466e-07, "aux_brier/mean_r": 0.9602911379188299, "aux_brier/n_active_tok": 203.0, "aux_brier/n_step_records": 50.75, "aux_brier/std_r": 0.07378150732256472, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.582388134114421, "calib/avg_num_step_conf": 6.3515625, "calib/ece": 0.4054117647058823, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023900660600772783, "calib/mean_conf": 0.15145098039215688, "calib/mu_c": 0.16204225352112675, "calib/mu_w": 0.13814159292035397, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.07288893343084318, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 418.2265625, "completions/mean_terminated_length": 418.2265625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.12266666666666666, "grad_norm": 0.008021569810807705, "learning_rate": 2.361111111111111e-06, "loss": 0.0674, "num_tokens": 24459143.0, "reward": 1.1924775838851929, "reward_std": 0.28190475702285767, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5824105739593506, "rewards/format_reward_step": 0.984375, "step": 115 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.9303265438863626e-07, "aux_brier/mean_r": 0.9693176839500666, "aux_brier/n_active_tok": 200.25, "aux_brier/n_step_records": 50.0625, "aux_brier/std_r": 0.05107466934714466, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5021464646464646, "calib/avg_num_step_conf": 6.2578125, "calib/ece": 0.3946456692913385, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004508838383838365, "calib/mean_conf": 0.17228346456692917, "calib/mu_c": 0.17423611111111112, "calib/mu_w": 0.16972727272727275, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.08138262865172452, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 482.71484375, "completions/mean_terminated_length": 484.6078796386719, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.12373333333333333, "grad_norm": 0.007442351896315813, "learning_rate": 2.3333333333333336e-06, "loss": 0.0496, "num_tokens": 24687238.0, "reward": 1.2012077569961548, "reward_std": 0.18627876043319702, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5860809087753296, "rewards/format_reward_step": 0.984375, "step": 116 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.129237786229758e-08, "aux_brier/mean_r": 0.9578664638102055, "aux_brier/n_active_tok": 230.625, "aux_brier/n_step_records": 57.65625, "aux_brier/std_r": 0.08411719999276102, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.475886569416499, "calib/avg_num_step_conf": 7.2734375, "calib/ece": 0.29163385826771654, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.012561619718309852, "calib/mean_conf": 0.17639763779527562, "calib/mu_c": 0.16937500000000003, "calib/mu_w": 0.18193661971830988, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.013543307086614173, "calib/std_conf": 0.08694411361361841, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2661.0, "completions/max_terminated_length": 2661.0, "completions/mean_length": 450.6640625, "completions/mean_terminated_length": 452.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.1248, "grad_norm": 0.027483254671096802, "learning_rate": 2.305555555555556e-06, "loss": -0.0025, "num_tokens": 24909208.0, "reward": 1.0944743156433105, "reward_std": 0.27873146533966064, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6591474413871765, "rewards/format_reward_step": 0.984375, "step": 117 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.2244894554445125e-07, "aux_brier/mean_r": 0.9603268560022116, "aux_brier/n_active_tok": 234.75, "aux_brier/n_step_records": 58.6875, "aux_brier/std_r": 0.06616785260848701, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48898533570066416, "calib/avg_num_step_conf": 7.5546875, "calib/ece": 0.38770161290322575, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011895179851384247, "calib/mean_conf": 0.18649193548387097, "calib/mu_c": 0.18116788321167884, "calib/mu_w": 0.1930630630630631, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.010887096774193552, "calib/std_conf": 0.1076892180160069, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 491.203125, "completions/mean_terminated_length": 495.07086181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.12586666666666665, "grad_norm": 0.011950023472309113, "learning_rate": 2.277777777777778e-06, "loss": 0.0027, "num_tokens": 25138964.0, "reward": 1.1651744842529297, "reward_std": 0.17621293663978577, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5825730562210083, "rewards/format_reward_step": 0.96875, "step": 118 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.911992571165484e-08, "aux_brier/mean_r": 0.9554366022348404, "aux_brier/n_active_tok": 224.0, "aux_brier/n_step_records": 56.0, "aux_brier/std_r": 0.08520230033900589, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4869315373208107, "calib/avg_num_step_conf": 7.05859375, "calib/ece": 0.33529411764705885, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0060924369747898666, "calib/mean_conf": 0.1980392156862745, "calib/mu_c": 0.20088235294117646, "calib/mu_w": 0.1947899159663866, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.10495226940989325, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1802.0, "completions/max_terminated_length": 1802.0, "completions/mean_length": 512.59765625, "completions/mean_terminated_length": 514.6078491210938, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.12693333333333334, "grad_norm": 0.008595707826316357, "learning_rate": 2.25e-06, "loss": 0.0037, "num_tokens": 25375253.0, "reward": 1.1784425973892212, "reward_std": 0.26389604806900024, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.62002032995224, "rewards/format_reward_step": 0.984375, "step": 119 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.6256477076168636e-07, "aux_brier/mean_r": 0.9540219716727734, "aux_brier/n_active_tok": 208.375, "aux_brier/n_step_records": 52.09375, "aux_brier/std_r": 0.0882777045480907, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5290421195652174, "calib/avg_num_step_conf": 6.8359375, "calib/ece": 0.44047619047619035, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016758152173913077, "calib/mean_conf": 0.19444444444444445, "calib/mu_c": 0.20056250000000003, "calib/mu_w": 0.18380434782608696, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.09563949801335697, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 444.97265625, "completions/mean_terminated_length": 448.47637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.128, "grad_norm": 0.01172333862632513, "learning_rate": 2.222222222222222e-06, "loss": 0.0095, "num_tokens": 25595854.0, "reward": 1.24784255027771, "reward_std": 0.2769468128681183, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.553869903087616, "rewards/format_reward_step": 0.96875, "step": 120 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.0046439955522253e-07, "aux_brier/mean_r": 0.9500356279313564, "aux_brier/n_active_tok": 236.625, "aux_brier/n_step_records": 59.15625, "aux_brier/std_r": 0.08284603539505042, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4771746031746032, "calib/avg_num_step_conf": 7.703125, "calib/ece": 0.3030278884462152, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": -0.02618158730158726, "calib/mean_conf": 0.2342629482071713, "calib/mu_c": 0.22112000000000004, "calib/mu_w": 0.2473015873015873, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.019641434262948204, "calib/std_conf": 0.1336305581720134, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2551.0, "completions/max_terminated_length": 2551.0, "completions/mean_length": 515.96875, "completions/mean_terminated_length": 524.1587524414062, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.12906666666666666, "grad_norm": 0.020066162571310997, "learning_rate": 2.1944444444444445e-06, "loss": -0.011, "num_tokens": 25832998.0, "reward": 1.1299539804458618, "reward_std": 0.33891183137893677, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6291910409927368, "rewards/format_reward_step": 0.96875, "step": 121 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.5776045442515993e-07, "aux_brier/mean_r": 0.9499933030456305, "aux_brier/n_active_tok": 211.5, "aux_brier/n_step_records": 52.875, "aux_brier/std_r": 0.08888547073001973, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6227531060005287, "calib/avg_num_step_conf": 6.72265625, "calib/ece": 0.3854031620553359, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.04851189532117367, "calib/mean_conf": 0.23159288537549405, "calib/mu_c": 0.2501923076923077, "calib/mu_w": 0.20168041237113404, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00019762845849802388, "calib/std_conf": 0.1269235098028985, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2626.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 471.984375, "completions/mean_terminated_length": 475.7007751464844, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.13013333333333332, "grad_norm": 0.0151255177333951, "learning_rate": 2.166666666666667e-06, "loss": -0.0226, "num_tokens": 26061170.0, "reward": 1.2516987323760986, "reward_std": 0.2080809772014618, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6083576679229736, "rewards/format_reward_step": 0.98046875, "step": 122 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.329810063177561e-07, "aux_brier/mean_r": 0.9279930610209703, "aux_brier/n_active_tok": 232.875, "aux_brier/n_step_records": 58.21875, "aux_brier/std_r": 0.11340552975889295, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5146741886409737, "calib/avg_num_step_conf": 7.34375, "calib/ece": 0.2926587301587302, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019563894523326547, "calib/mean_conf": 0.2470238095238095, "calib/mu_c": 0.2560294117647059, "calib/mu_w": 0.23646551724137935, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.12829263202969443, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2838.0, "completions/max_terminated_length": 2838.0, "completions/mean_length": 526.9375, "completions/mean_terminated_length": 529.0039672851562, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.1312, "grad_norm": 0.00813714973628521, "learning_rate": 2.138888888888889e-06, "loss": 0.0188, "num_tokens": 26301354.0, "reward": 1.1827906370162964, "reward_std": 0.28444749116897583, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6452249884605408, "rewards/format_reward_step": 0.98046875, "step": 123 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.931757783907756e-08, "aux_brier/mean_r": 0.9353038147091866, "aux_brier/n_active_tok": 225.5, "aux_brier/n_step_records": 56.375, "aux_brier/std_r": 0.09529420151375234, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6057159810126582, "calib/avg_num_step_conf": 7.19921875, "calib/ece": 0.3789566929133858, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.038809994725738345, "calib/mean_conf": 0.2559645669291339, "calib/mu_c": 0.27063291139240503, "calib/mu_w": 0.23182291666666668, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006437007874015749, "calib/std_conf": 0.13040954087334525, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 485.30859375, "completions/mean_terminated_length": 487.2117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.13226666666666667, "grad_norm": 0.03915087506175041, "learning_rate": 2.1111111111111114e-06, "loss": -0.0431, "num_tokens": 26532409.0, "reward": 1.2700769901275635, "reward_std": 0.23944897949695587, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6271827220916748, "rewards/format_reward_step": 0.9921875, "step": 124 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0031124897391663e-07, "aux_brier/mean_r": 0.9100860338658094, "aux_brier/n_active_tok": 210.5, "aux_brier/n_step_records": 52.625, "aux_brier/std_r": 0.11714433634188026, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5302374393283484, "calib/avg_num_step_conf": 7.0546875, "calib/ece": 0.24278659919028336, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.012145748987854251, "calib/gap": 0.017246788665879642, "calib/mean_conf": 0.2958368825910931, "calib/mu_c": 0.3042857142857143, "calib/mu_w": 0.2870389256198347, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.014251012145748986, "calib/std_conf": 0.17502843640911678, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2819.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 523.203125, "completions/mean_terminated_length": 533.62548828125, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.13333333333333333, "grad_norm": 0.020375991240143776, "learning_rate": 2.0833333333333334e-06, "loss": 0.0403, "num_tokens": 26771157.0, "reward": 1.1364705562591553, "reward_std": 0.2968687117099762, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6552572250366211, "rewards/format_reward_step": 0.9609375, "step": 125 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.848494819989277e-07, "aux_brier/mean_r": 0.9077005349099636, "aux_brier/n_active_tok": 240.0, "aux_brier/n_step_records": 60.0, "aux_brier/std_r": 0.12330675616976805, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5398786717752235, "calib/avg_num_step_conf": 7.625, "calib/ece": 0.32916996047430824, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": -0.010234355044699883, "calib/mean_conf": 0.3147826086956522, "calib/mu_c": 0.3104137931034483, "calib/mu_w": 0.3206481481481482, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03541501976284585, "calib/std_conf": 0.17633413578935334, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2760.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 513.83984375, "completions/mean_terminated_length": 517.8858032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.1344, "grad_norm": 0.01395754236727953, "learning_rate": 2.0555555555555555e-06, "loss": 0.0231, "num_tokens": 27008164.0, "reward": 1.216122031211853, "reward_std": 0.2721666097640991, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.637925386428833, "rewards/format_reward_step": 0.98046875, "step": 126 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.5298079181214064e-07, "aux_brier/mean_r": 0.9193386882543564, "aux_brier/n_active_tok": 224.375, "aux_brier/n_step_records": 56.09375, "aux_brier/std_r": 0.10538892593467608, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5117407844680572, "calib/avg_num_step_conf": 7.1328125, "calib/ece": 0.2211255060728745, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.012145748987854251, "calib/gap": 0.01618358913813467, "calib/mean_conf": 0.3084291497975708, "calib/mu_c": 0.3163571428571429, "calib/mu_w": 0.3001735537190082, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009716599190283401, "calib/std_conf": 0.17280810670041616, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2838.0, "completions/max_terminated_length": 2838.0, "completions/mean_length": 521.43359375, "completions/mean_terminated_length": 527.6166381835938, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.13546666666666668, "grad_norm": 0.009091866202652454, "learning_rate": 2.027777777777778e-06, "loss": 0.0523, "num_tokens": 27245323.0, "reward": 1.1404777765274048, "reward_std": 0.24780690670013428, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.66347336769104, "rewards/format_reward_step": 0.96484375, "step": 127 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.150997701650837e-08, "aux_brier/mean_r": 0.9189047124236822, "aux_brier/n_active_tok": 208.875, "aux_brier/n_step_records": 52.21875, "aux_brier/std_r": 0.10833306721178815, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5271963824289406, "calib/avg_num_step_conf": 6.94140625, "calib/ece": 0.23056224899598388, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": -0.0005775193798449041, "calib/mean_conf": 0.3402008032128514, "calib/mu_c": 0.33992248062015507, "calib/mu_w": 0.34049999999999997, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.026345381526104418, "calib/std_conf": 0.16651989472948964, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 510.91796875, "completions/mean_terminated_length": 516.976318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.13653333333333334, "grad_norm": 0.011102133430540562, "learning_rate": 2.0000000000000003e-06, "loss": -0.023, "num_tokens": 27482782.0, "reward": 1.1620869636535645, "reward_std": 0.2814047932624817, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6717855930328369, "rewards/format_reward_step": 0.97265625, "step": 128 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.531554108320849e-08, "aux_brier/mean_r": 0.9088293649256229, "aux_brier/n_active_tok": 231.625, "aux_brier/n_step_records": 57.90625, "aux_brier/std_r": 0.11498634936287999, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5359417344173442, "calib/avg_num_step_conf": 7.24609375, "calib/ece": 0.31440944881889765, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.017518970189701888, "calib/mean_conf": 0.3527559055118111, "calib/mu_c": 0.3589634146341464, "calib/mu_w": 0.3414444444444445, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010748031496063001, "calib/std_conf": 0.16518744859639412, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2563.0, "completions/max_terminated_length": 2563.0, "completions/mean_length": 468.5625, "completions/mean_terminated_length": 468.5625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.1376, "grad_norm": 0.008855071850121021, "learning_rate": 1.9722222222222224e-06, "loss": 0.0093, "num_tokens": 27705118.0, "reward": 1.301955223083496, "reward_std": 0.23477554321289062, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6609461307525635, "rewards/format_reward_step": 0.9921875, "step": 129 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.7721546882731865e-07, "aux_brier/mean_r": 0.9020894207060337, "aux_brier/n_active_tok": 224.25, "aux_brier/n_step_records": 56.0625, "aux_brier/std_r": 0.1321611349703744, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5029802685667306, "calib/avg_num_step_conf": 7.2109375, "calib/ece": 0.33723320158102754, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": -0.009704713620169891, "calib/mean_conf": 0.35359683794466407, "calib/mu_c": 0.3501829268292683, "calib/mu_w": 0.3598876404494382, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02130434782608695, "calib/std_conf": 0.1700642389378069, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 454.83203125, "completions/mean_terminated_length": 458.41339111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.13866666666666666, "grad_norm": 0.009399204514920712, "learning_rate": 1.944444444444445e-06, "loss": -0.0488, "num_tokens": 27926843.0, "reward": 1.2932324409484863, "reward_std": 0.1858869194984436, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6416797637939453, "rewards/format_reward_step": 0.984375, "step": 130 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.090573959778741e-08, "aux_brier/mean_r": 0.8913759496062994, "aux_brier/n_active_tok": 220.125, "aux_brier/n_step_records": 55.03125, "aux_brier/std_r": 0.1489139118930325, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.527970047822804, "calib/avg_num_step_conf": 7.0546875, "calib/ece": 0.16857707509881423, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.019762845849802372, "calib/gap": 0.011159702995217624, "calib/mean_conf": 0.38628458498023716, "calib/mu_c": 0.39232758620689645, "calib/mu_w": 0.3811678832116788, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04818181818181817, "calib/std_conf": 0.17406903711839675, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2761.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 487.203125, "completions/mean_terminated_length": 487.203125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.13973333333333332, "grad_norm": 0.00836309976875782, "learning_rate": 1.916666666666667e-06, "loss": 0.0405, "num_tokens": 28157775.0, "reward": 1.1294946670532227, "reward_std": 0.2559651732444763, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7132909893989563, "rewards/format_reward_step": 0.98828125, "step": 131 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.895790240548429e-08, "aux_brier/mean_r": 0.8524616025388241, "aux_brier/n_active_tok": 264.75, "aux_brier/n_step_records": 66.1875, "aux_brier/std_r": 0.1806037900969386, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.42480645161290326, "calib/avg_num_step_conf": 8.4375, "calib/ece": 0.30229411764705877, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": -0.06503387096774194, "calib/mean_conf": 0.44201960784313726, "calib/mu_c": 0.4165161290322581, "calib/mu_w": 0.48155000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06823529411764706, "calib/std_conf": 0.21549746281169432, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2288.0, "completions/max_terminated_length": 2288.0, "completions/mean_length": 531.33203125, "completions/mean_terminated_length": 531.33203125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.1408, "grad_norm": 0.008158460259437561, "learning_rate": 1.888888888888889e-06, "loss": 0.0289, "num_tokens": 28399388.0, "reward": 1.261675238609314, "reward_std": 0.2958708703517914, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6482632160186768, "rewards/format_reward_step": 0.98828125, "step": 132 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2898586573828474e-08, "aux_brier/mean_r": 0.8725182209163904, "aux_brier/n_active_tok": 253.125, "aux_brier/n_step_records": 63.28125, "aux_brier/std_r": 0.1537803045939654, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.46758900928792574, "calib/avg_num_step_conf": 8.09765625, "calib/ece": 0.174984, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": -0.021414860681114534, "calib/mean_conf": 0.46317600000000003, "calib/mu_c": 0.45152631578947366, "calib/mu_w": 0.4729411764705882, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09108000000000001, "calib/std_conf": 0.2017700350002448, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2614.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 571.69140625, "completions/mean_terminated_length": 573.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.14186666666666667, "grad_norm": 0.018184488639235497, "learning_rate": 1.8611111111111113e-06, "loss": 0.0669, "num_tokens": 28652085.0, "reward": 1.0995794534683228, "reward_std": 0.39164456725120544, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6795676350593567, "rewards/format_reward_step": 0.96875, "step": 133 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.516849356177687e-08, "aux_brier/mean_r": 0.8617879282683134, "aux_brier/n_active_tok": 249.125, "aux_brier/n_step_records": 62.28125, "aux_brier/std_r": 0.1777062778128311, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5413636363636363, "calib/avg_num_step_conf": 8.140625, "calib/ece": 0.23357599999999998, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.056, "calib/gap": 0.012542857142857078, "calib/mean_conf": 0.439624, "calib/mu_c": 0.4451428571428571, "calib/mu_w": 0.43260000000000004, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0566, "calib/std_conf": 0.22501760514235325, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1667.0, "completions/max_terminated_length": 1667.0, "completions/mean_length": 542.84375, "completions/mean_terminated_length": 551.4603271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.14293333333333333, "grad_norm": 0.021115439012646675, "learning_rate": 1.8333333333333333e-06, "loss": -0.0145, "num_tokens": 28900005.0, "reward": 1.2022333145141602, "reward_std": 0.32995420694351196, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6761208772659302, "rewards/format_reward_step": 0.97265625, "step": 134 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.6903640487342386e-08, "aux_brier/mean_r": 0.8582585956901312, "aux_brier/n_active_tok": 258.25, "aux_brier/n_step_records": 64.5625, "aux_brier/std_r": 0.17754859966225922, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4545179914070891, "calib/avg_num_step_conf": 8.58984375, "calib/ece": 0.261252, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.084, "calib/gap": -0.03793461331901177, "calib/mean_conf": 0.487548, "calib/mu_c": 0.47267763157894743, "calib/mu_w": 0.5106122448979592, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0704, "calib/std_conf": 0.23584587699597379, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2731.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 542.625, "completions/mean_terminated_length": 546.8976440429688, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.144, "grad_norm": 0.013981486670672894, "learning_rate": 1.8055555555555557e-06, "loss": 0.0241, "num_tokens": 29144797.0, "reward": 1.2408422231674194, "reward_std": 0.3398025929927826, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.650868833065033, "rewards/format_reward_step": 0.96875, "step": 135 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.972135108667231e-08, "aux_brier/mean_r": 0.8717589396983385, "aux_brier/n_active_tok": 274.75, "aux_brier/n_step_records": 68.6875, "aux_brier/std_r": 0.15413922793231905, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48408564814814814, "calib/avg_num_step_conf": 8.64453125, "calib/ece": 0.18665079365079365, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05555555555555555, "calib/gap": -0.026870370370370322, "calib/mean_conf": 0.47053968253968254, "calib/mu_c": 0.4551851851851852, "calib/mu_w": 0.4820555555555555, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11430952380952383, "calib/std_conf": 0.1984476371618904, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2756.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 545.09765625, "completions/mean_terminated_length": 549.3897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.14506666666666668, "grad_norm": 0.012743341736495495, "learning_rate": 1.777777777777778e-06, "loss": -0.0309, "num_tokens": 29392830.0, "reward": 1.0838903188705444, "reward_std": 0.26073917746543884, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6871236562728882, "rewards/format_reward_step": 0.98046875, "step": 136 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.1670128131100483e-08, "aux_brier/mean_r": 0.8443158883601427, "aux_brier/n_active_tok": 264.5, "aux_brier/n_step_records": 66.125, "aux_brier/std_r": 0.18676162557676435, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4611259222518445, "calib/avg_num_step_conf": 8.45703125, "calib/ece": 0.22016496062992125, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.06692913385826772, "calib/gap": -0.041219685039370235, "calib/mean_conf": 0.4909374015748031, "calib/mu_c": 0.470327559055118, "calib/mu_w": 0.5115472440944883, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10555118110236222, "calib/std_conf": 0.21249853556406037, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2459.0, "completions/max_terminated_length": 2459.0, "completions/mean_length": 506.9921875, "completions/mean_terminated_length": 508.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.14613333333333334, "grad_norm": 0.00968876387923956, "learning_rate": 1.75e-06, "loss": 0.0459, "num_tokens": 29629604.0, "reward": 1.1618894338607788, "reward_std": 0.3098908066749573, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6788074374198914, "rewards/format_reward_step": 0.9921875, "step": 137 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4581574700467392e-07, "aux_brier/mean_r": 0.8592845667153597, "aux_brier/n_active_tok": 252.625, "aux_brier/n_step_records": 63.15625, "aux_brier/std_r": 0.1593852590303868, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5193092105263157, "calib/avg_num_step_conf": 8.0703125, "calib/ece": 0.17367658730158725, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.031746031746031744, "calib/gap": 0.02093605263157905, "calib/mean_conf": 0.5088630952380953, "calib/mu_c": 0.517171052631579, "calib/mu_w": 0.496235, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03968253968253967, "calib/std_conf": 0.20308809886338322, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1843.0, "completions/max_terminated_length": 1843.0, "completions/mean_length": 508.296875, "completions/mean_terminated_length": 508.296875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.1472, "grad_norm": 0.008025892078876495, "learning_rate": 1.7222222222222224e-06, "loss": 0.0067, "num_tokens": 29864064.0, "reward": 1.2632548809051514, "reward_std": 0.30502110719680786, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.709269642829895, "rewards/format_reward_step": 0.984375, "step": 138 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.139147943837962e-08, "aux_brier/mean_r": 0.8623798973858356, "aux_brier/n_active_tok": 234.625, "aux_brier/n_step_records": 58.65625, "aux_brier/std_r": 0.17043262324295938, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5323945440767871, "calib/avg_num_step_conf": 7.4765625, "calib/ece": 0.2103529411764705, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07450980392156863, "calib/gap": 0.01609307906036883, "calib/mean_conf": 0.5058823529411764, "calib/mu_c": 0.5126351351351351, "calib/mu_w": 0.4965420560747663, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06792156862745095, "calib/std_conf": 0.22263639987954892, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2286.0, "completions/max_terminated_length": 2286.0, "completions/mean_length": 482.04296875, "completions/mean_terminated_length": 483.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.14826666666666666, "grad_norm": 0.010458669625222683, "learning_rate": 1.6944444444444446e-06, "loss": 0.0176, "num_tokens": 30090563.0, "reward": 1.2498453855514526, "reward_std": 0.27873075008392334, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.7025066614151001, "rewards/format_reward_step": 0.9921875, "step": 139 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2785630215672583e-07, "aux_brier/mean_r": 0.8477766457945108, "aux_brier/n_active_tok": 261.25, "aux_brier/n_step_records": 65.3125, "aux_brier/std_r": 0.1824449956184253, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5135658914728682, "calib/avg_num_step_conf": 8.39453125, "calib/ece": 0.2286759842519685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.07480314960629922, "calib/gap": 0.01140654761904758, "calib/mean_conf": 0.4975444881889764, "calib/mu_c": 0.5014065476190476, "calib/mu_w": 0.49, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03240157480314959, "calib/std_conf": 0.22897756885020218, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2588.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 504.546875, "completions/mean_terminated_length": 506.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.14933333333333335, "grad_norm": 0.009692667052149773, "learning_rate": 1.6666666666666667e-06, "loss": 0.0229, "num_tokens": 30324743.0, "reward": 1.3236327171325684, "reward_std": 0.28123971819877625, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6929682493209839, "rewards/format_reward_step": 0.98828125, "step": 140 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.8497130996906215e-08, "aux_brier/mean_r": 0.8235095776617527, "aux_brier/n_active_tok": 249.375, "aux_brier/n_step_records": 62.34375, "aux_brier/std_r": 0.21152607444673777, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5448500282965478, "calib/avg_num_step_conf": 8.66796875, "calib/ece": 0.20662367346938773, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.10612244897959183, "calib/gap": 0.03771396434634988, "calib/mean_conf": 0.5271722448979592, "calib/mu_c": 0.5414881578947369, "calib/mu_w": 0.503774193548387, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0566938775510204, "calib/std_conf": 0.24455517278474556, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 536.48828125, "completions/mean_terminated_length": 545.0040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.1504, "grad_norm": 0.024134991690516472, "learning_rate": 1.638888888888889e-06, "loss": 0.0073, "num_tokens": 30569180.0, "reward": 1.2469449043273926, "reward_std": 0.28342723846435547, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6830918788909912, "rewards/format_reward_step": 0.95703125, "step": 141 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.197845527413428e-09, "aux_brier/mean_r": 0.7782465536147356, "aux_brier/n_active_tok": 265.0, "aux_brier/n_step_records": 66.25, "aux_brier/std_r": 0.24000422982499003, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5153558763931104, "calib/avg_num_step_conf": 9.40234375, "calib/ece": 0.20901857707509885, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.15019762845849802, "calib/gap": 0.011068021783181337, "calib/mean_conf": 0.5738272727272727, "calib/mu_c": 0.5787269503546099, "calib/mu_w": 0.5676589285714285, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11276679841897234, "calib/std_conf": 0.27077213296677377, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 563.4140625, "completions/mean_terminated_length": 565.62353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.15146666666666667, "grad_norm": 0.009331640787422657, "learning_rate": 1.6111111111111113e-06, "loss": 0.018, "num_tokens": 30818574.0, "reward": 1.2142034769058228, "reward_std": 0.28972476720809937, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6771265268325806, "rewards/format_reward_step": 0.98828125, "step": 142 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.4575159946779195e-08, "aux_brier/mean_r": 0.8168259747326374, "aux_brier/n_active_tok": 265.375, "aux_brier/n_step_records": 66.34375, "aux_brier/std_r": 0.21685197483748198, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4673101673101673, "calib/avg_num_step_conf": 9.0625, "calib/ece": 0.3025179282868525, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.10756972111553785, "calib/gap": -0.03427464607464603, "calib/mean_conf": 0.5172430278884462, "calib/mu_c": 0.5020857142857144, "calib/mu_w": 0.5363603603603604, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13099601593625498, "calib/std_conf": 0.27406677346210995, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 560.0703125, "completions/mean_terminated_length": 564.4802856445312, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.15253333333333333, "grad_norm": 0.01297173835337162, "learning_rate": 1.5833333333333333e-06, "loss": 0.0296, "num_tokens": 31069288.0, "reward": 1.1988067626953125, "reward_std": 0.2663654386997223, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.64678955078125, "rewards/format_reward_step": 0.98046875, "step": 143 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.866390711233404e-08, "aux_brier/mean_r": 0.7747939079999924, "aux_brier/n_active_tok": 244.0, "aux_brier/n_step_records": 61.0, "aux_brier/std_r": 0.2413838733918965, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5451625853071056, "calib/avg_num_step_conf": 8.40625, "calib/ece": 0.21263833992094855, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.17391304347826086, "calib/gap": 0.03268165395423539, "calib/mean_conf": 0.6060177865612649, "calib/mu_c": 0.6181603773584906, "calib/mu_w": 0.5854787234042552, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.09509881422924898, "calib/std_conf": 0.27574233024254874, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 549.296875, "completions/mean_terminated_length": 549.296875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.1536, "grad_norm": 0.00900542177259922, "learning_rate": 1.5555555555555558e-06, "loss": 0.055, "num_tokens": 31314036.0, "reward": 1.2786402702331543, "reward_std": 0.32019805908203125, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6848737001419067, "rewards/format_reward_step": 0.97265625, "step": 144 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.618328382222092e-08, "aux_brier/mean_r": 0.7574687898159027, "aux_brier/n_active_tok": 241.25, "aux_brier/n_step_records": 60.3125, "aux_brier/std_r": 0.26402513263747096, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5313693219223173, "calib/avg_num_step_conf": 8.81640625, "calib/ece": 0.23980474308300398, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1857707509881423, "calib/gap": 0.038888545095457694, "calib/mean_conf": 0.5704719367588934, "calib/mu_c": 0.5855354838709678, "calib/mu_w": 0.5466469387755101, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0988142292490119, "calib/std_conf": 0.29673685332554395, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2858.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 518.0, "completions/mean_terminated_length": 518.0, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.15466666666666667, "grad_norm": 0.008613722398877144, "learning_rate": 1.527777777777778e-06, "loss": 0.0406, "num_tokens": 31549348.0, "reward": 1.2704129219055176, "reward_std": 0.35175979137420654, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.683214008808136, "rewards/format_reward_step": 0.98828125, "step": 145 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.071300659964528e-08, "aux_brier/mean_r": 0.7805202156305313, "aux_brier/n_active_tok": 262.5, "aux_brier/n_step_records": 65.625, "aux_brier/std_r": 0.2427838179282844, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5422179610099073, "calib/avg_num_step_conf": 9.0390625, "calib/ece": 0.2649086614173229, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1889763779527559, "calib/gap": 0.046231613934164195, "calib/mean_conf": 0.5848551181102363, "calib/mu_c": 0.611975238095238, "calib/mu_w": 0.5657436241610738, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21818897637795281, "calib/std_conf": 0.2956251361694777, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 556.44140625, "completions/mean_terminated_length": 560.8228149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.15573333333333333, "grad_norm": 0.014284295029938221, "learning_rate": 1.5e-06, "loss": 0.0208, "num_tokens": 31799013.0, "reward": 1.0678682327270508, "reward_std": 0.32628577947616577, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6542854309082031, "rewards/format_reward_step": 0.98828125, "step": 146 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1309542691240182e-08, "aux_brier/mean_r": 0.7675090562552214, "aux_brier/n_active_tok": 249.25, "aux_brier/n_step_records": 62.3125, "aux_brier/std_r": 0.24736192263662815, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5817692692692693, "calib/avg_num_step_conf": 8.62890625, "calib/ece": 0.24453554687500006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.16015625, "calib/gap": 0.0719113363363364, "calib/mean_conf": 0.605620703125, "calib/mu_c": 0.6471944444444444, "calib/mu_w": 0.575283108108108, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21414062500000003, "calib/std_conf": 0.2684571535316541, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1559.0, "completions/max_terminated_length": 1559.0, "completions/mean_length": 556.23046875, "completions/mean_terminated_length": 558.4118041992188, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.1568, "grad_norm": 0.008952882140874863, "learning_rate": 1.4722222222222225e-06, "loss": -0.0083, "num_tokens": 32045088.0, "reward": 1.090343713760376, "reward_std": 0.21987280249595642, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6816875338554382, "rewards/format_reward_step": 0.99609375, "step": 147 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.9160161943143628e-08, "aux_brier/mean_r": 0.7767994459718466, "aux_brier/n_active_tok": 246.0, "aux_brier/n_step_records": 61.5, "aux_brier/std_r": 0.24752691807225347, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.54140625, "calib/avg_num_step_conf": 8.57421875, "calib/ece": 0.2607681451612904, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.1774193548387097, "calib/gap": 0.04936960227272724, "calib/mean_conf": 0.5838286290322581, "calib/mu_c": 0.601346875, "calib/mu_w": 0.5519772727272727, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0997177419354839, "calib/std_conf": 0.2887517488198254, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1588.0, "completions/max_terminated_length": 1588.0, "completions/mean_length": 521.1015625, "completions/mean_terminated_length": 533.6080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.15786666666666666, "grad_norm": 0.03386766463518143, "learning_rate": 1.4444444444444445e-06, "loss": -0.0387, "num_tokens": 32283602.0, "reward": 1.284395694732666, "reward_std": 0.31442344188690186, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6844574809074402, "rewards/format_reward_step": 0.96875, "step": 148 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.8948215741175787e-08, "aux_brier/mean_r": 0.7503697946667671, "aux_brier/n_active_tok": 234.25, "aux_brier/n_step_records": 58.5625, "aux_brier/std_r": 0.2567706173285842, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.43728748315904276, "calib/avg_num_step_conf": 8.390625, "calib/ece": 0.2857857142857143, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.21428571428571427, "calib/gap": -0.05535625842047842, "calib/mean_conf": 0.617468253968254, "calib/mu_c": 0.5935244755244756, "calib/mu_w": 0.648880733944954, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1678968253968254, "calib/std_conf": 0.28117495997896685, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2464.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 587.9921875, "completions/mean_terminated_length": 590.298095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.15893333333333334, "grad_norm": 0.007785080000758171, "learning_rate": 1.4166666666666667e-06, "loss": -0.0099, "num_tokens": 32538584.0, "reward": 1.20124089717865, "reward_std": 0.32469823956489563, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6252763271331787, "rewards/format_reward_step": 0.97265625, "step": 149 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.118356088435917e-08, "aux_brier/mean_r": 0.7470440305769444, "aux_brier/n_active_tok": 248.625, "aux_brier/n_step_records": 62.15625, "aux_brier/std_r": 0.25454069022089243, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5112681031437655, "calib/avg_num_step_conf": 8.93359375, "calib/ece": 0.22534016393442627, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.21721311475409835, "calib/gap": 0.01281419992935351, "calib/mean_conf": 0.6584303278688525, "calib/mu_c": 0.6634194630872484, "calib/mu_w": 0.6506052631578949, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1365573770491804, "calib/std_conf": 0.2641019333730215, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2855.0, "completions/max_terminated_length": 2855.0, "completions/mean_length": 543.6640625, "completions/mean_terminated_length": 550.1107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.16, "grad_norm": 0.0191176887601614, "learning_rate": 1.3888888888888892e-06, "loss": 0.0568, "num_tokens": 32782722.0, "reward": 1.2219321727752686, "reward_std": 0.31413596868515015, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6611664295196533, "rewards/format_reward_step": 0.94921875, "step": 150 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.269292778163013e-08, "aux_brier/mean_r": 0.7436981536448002, "aux_brier/n_active_tok": 249.875, "aux_brier/n_step_records": 62.46875, "aux_brier/std_r": 0.26360502000898123, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.45231271660890426, "calib/avg_num_step_conf": 8.765625, "calib/ece": 0.27006653061224495, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.20816326530612245, "calib/gap": -0.041253585710477436, "calib/mean_conf": 0.6210355102040817, "calib/mu_c": 0.6006612903225804, "calib/mu_w": 0.6419148760330579, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.1924897959183674, "calib/std_conf": 0.2798648440642501, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2691.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 608.20703125, "completions/mean_terminated_length": 617.8611450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.16106666666666666, "grad_norm": 0.008505653589963913, "learning_rate": 1.3611111111111112e-06, "loss": 0.0322, "num_tokens": 33045447.0, "reward": 1.1102604866027832, "reward_std": 0.3097105622291565, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6051042675971985, "rewards/format_reward_step": 0.94921875, "step": 151 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.9712193960463864e-09, "aux_brier/mean_r": 0.752042293548584, "aux_brier/n_active_tok": 248.625, "aux_brier/n_step_records": 62.15625, "aux_brier/std_r": 0.2555644544772804, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.469896331738437, "calib/avg_num_step_conf": 8.8046875, "calib/ece": 0.24601138211382115, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.1951219512195122, "calib/gap": -0.027342862838915405, "calib/mean_conf": 0.6147203252032519, "calib/mu_c": 0.6020492424242424, "calib/mu_w": 0.6293921052631578, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.16207317073170732, "calib/std_conf": 0.2850369110199531, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2640.0, "completions/max_terminated_length": 2640.0, "completions/mean_length": 563.578125, "completions/mean_terminated_length": 568.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.16213333333333332, "grad_norm": 0.007569004315882921, "learning_rate": 1.3333333333333334e-06, "loss": 0.0162, "num_tokens": 33295115.0, "reward": 1.1358160972595215, "reward_std": 0.38195180892944336, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6057645678520203, "rewards/format_reward_step": 0.9375, "step": 152 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.90705231898292e-09, "aux_brier/mean_r": 0.7538456488400698, "aux_brier/n_active_tok": 250.375, "aux_brier/n_step_records": 62.59375, "aux_brier/std_r": 0.2572556813247502, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4305141074335323, "calib/avg_num_step_conf": 8.96875, "calib/ece": 0.30703694779116464, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.24899598393574296, "calib/gap": -0.03869542864894193, "calib/mean_conf": 0.6355333333333333, "calib/mu_c": 0.6204592105263158, "calib/mu_w": 0.6591546391752577, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.1660642570281125, "calib/std_conf": 0.2905727726372962, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 584.6875, "completions/mean_terminated_length": 591.62060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.1632, "grad_norm": 0.021739106625318527, "learning_rate": 1.3055555555555556e-06, "loss": -0.0208, "num_tokens": 33552115.0, "reward": 1.2180697917938232, "reward_std": 0.33338308334350586, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6222794651985168, "rewards/format_reward_step": 0.9375, "step": 153 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.266220491293659e-08, "aux_brier/mean_r": 0.7622414398938417, "aux_brier/n_active_tok": 248.75, "aux_brier/n_step_records": 62.1875, "aux_brier/std_r": 0.24693296663463116, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5147849462365591, "calib/avg_num_step_conf": 8.66015625, "calib/ece": 0.26656837944664036, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.22924901185770752, "calib/gap": 0.0070175168792198095, "calib/mean_conf": 0.638886166007905, "calib/mu_c": 0.6423255813953489, "calib/mu_w": 0.6353080645161291, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1977865612648222, "calib/std_conf": 0.28921660073487565, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2064.0, "completions/max_terminated_length": 2064.0, "completions/mean_length": 550.92578125, "completions/mean_terminated_length": 555.2637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.16426666666666667, "grad_norm": 0.011531784199178219, "learning_rate": 1.2777777777777779e-06, "loss": 0.0075, "num_tokens": 33797592.0, "reward": 1.1540908813476562, "reward_std": 0.28227803111076355, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6398011445999146, "rewards/format_reward_step": 0.98046875, "step": 154 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.440477923197463e-08, "aux_brier/mean_r": 0.767568077892065, "aux_brier/n_active_tok": 236.875, "aux_brier/n_step_records": 59.21875, "aux_brier/std_r": 0.24058766895905137, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5217307692307692, "calib/avg_num_step_conf": 8.36328125, "calib/ece": 0.30078000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.144, "calib/gap": -0.013522435897435847, "calib/mean_conf": 0.61154, "calib/mu_c": 0.6045083333333333, "calib/mu_w": 0.6180307692307692, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.21616000000000002, "calib/std_conf": 0.2704487494517215, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 544.28125, "completions/mean_terminated_length": 544.28125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.16533333333333333, "grad_norm": 0.007326643448323011, "learning_rate": 1.25e-06, "loss": 0.0455, "num_tokens": 34044144.0, "reward": 1.1116831302642822, "reward_std": 0.2930290699005127, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.634232759475708, "rewards/format_reward_step": 0.96875, "step": 155 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.2576459784984237e-08, "aux_brier/mean_r": 0.7396857850253582, "aux_brier/n_active_tok": 250.125, "aux_brier/n_step_records": 62.53125, "aux_brier/std_r": 0.2695887787267566, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5621212121212121, "calib/avg_num_step_conf": 8.96875, "calib/ece": 0.21989919028340088, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.20647773279352227, "calib/gap": 0.06673455204216083, "calib/mean_conf": 0.6073072874493928, "calib/mu_c": 0.6383780303030304, "calib/mu_w": 0.5716434782608696, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.1463967611336033, "calib/std_conf": 0.3051993569410943, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 599.1875, "completions/mean_terminated_length": 606.2925415039062, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.1664, "grad_norm": 0.015079155564308167, "learning_rate": 1.2222222222222223e-06, "loss": -0.0253, "num_tokens": 34302296.0, "reward": 1.1544930934906006, "reward_std": 0.28155696392059326, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6492223143577576, "rewards/format_reward_step": 0.9453125, "step": 156 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.6199376418345466e-09, "aux_brier/mean_r": 0.7355370167642832, "aux_brier/n_active_tok": 254.625, "aux_brier/n_step_records": 63.65625, "aux_brier/std_r": 0.2728663869202137, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.45014431907635793, "calib/avg_num_step_conf": 9.37109375, "calib/ece": 0.3109868525896416, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.20717131474103587, "calib/gap": -0.05829619522435048, "calib/mean_conf": 0.5983358565737052, "calib/mu_c": 0.5744135135135136, "calib/mu_w": 0.632709708737864, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15984063745019933, "calib/std_conf": 0.3116605412580398, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2428.0, "completions/max_terminated_length": 2428.0, "completions/mean_length": 561.3984375, "completions/mean_terminated_length": 568.0553588867188, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.16746666666666668, "grad_norm": 0.021188387647271156, "learning_rate": 1.1944444444444446e-06, "loss": 0.0076, "num_tokens": 34549742.0, "reward": 1.2244011163711548, "reward_std": 0.3301091194152832, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6163544654846191, "rewards/format_reward_step": 0.9765625, "step": 157 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.3244782560458717e-08, "aux_brier/mean_r": 0.7338093649595976, "aux_brier/n_active_tok": 251.875, "aux_brier/n_step_records": 62.96875, "aux_brier/std_r": 0.2696883250027895, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5417752867570386, "calib/avg_num_step_conf": 8.92578125, "calib/ece": 0.24843052208835345, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.3293172690763052, "calib/gap": 0.05092793274244012, "calib/mean_conf": 0.6890795180722892, "calib/mu_c": 0.7119868613138687, "calib/mu_w": 0.6610589285714286, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.19365461847389562, "calib/std_conf": 0.27888158320688117, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 583.1328125, "completions/mean_terminated_length": 590.0474243164062, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.16853333333333334, "grad_norm": 0.028858352452516556, "learning_rate": 1.1666666666666668e-06, "loss": -0.0096, "num_tokens": 34804264.0, "reward": 1.1819684505462646, "reward_std": 0.34807294607162476, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6575610637664795, "rewards/format_reward_step": 0.96484375, "step": 158 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.159553482734445e-08, "aux_brier/mean_r": 0.7610024102032185, "aux_brier/n_active_tok": 249.5, "aux_brier/n_step_records": 62.375, "aux_brier/std_r": 0.25927096977829933, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4866640258797122, "calib/avg_num_step_conf": 8.640625, "calib/ece": 0.25255357142857143, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.1984126984126984, "calib/gap": -0.018523172905525742, "calib/mean_conf": 0.5994305555555556, "calib/mu_c": 0.5921535947712419, "calib/mu_w": 0.6106767676767676, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12242063492063492, "calib/std_conf": 0.28054569864226303, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 540.26171875, "completions/mean_terminated_length": 542.3804321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.1696, "grad_norm": 0.06475929170846939, "learning_rate": 1.138888888888889e-06, "loss": 0.0244, "num_tokens": 35047355.0, "reward": 1.259587049484253, "reward_std": 0.2941041588783264, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6633485555648804, "rewards/format_reward_step": 0.984375, "step": 159 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.6387400486882484e-08, "aux_brier/mean_r": 0.722304493188858, "aux_brier/n_active_tok": 268.0, "aux_brier/n_step_records": 67.0, "aux_brier/std_r": 0.2759293089620769, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5726462103330711, "calib/avg_num_step_conf": 9.55859375, "calib/ece": 0.25283846153846157, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.27125506072874495, "calib/gap": 0.09221089037503294, "calib/mean_conf": 0.6638417004048583, "calib/mu_c": 0.7097604838709679, "calib/mu_w": 0.617549593495935, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.20732793522267212, "calib/std_conf": 0.3008842438216142, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2821.0, "completions/max_terminated_length": 2821.0, "completions/mean_length": 603.7890625, "completions/mean_terminated_length": 608.5433349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.17066666666666666, "grad_norm": 0.00955340638756752, "learning_rate": 1.111111111111111e-06, "loss": -0.0037, "num_tokens": 35306765.0, "reward": 1.1169214248657227, "reward_std": 0.3309890627861023, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6395604610443115, "rewards/format_reward_step": 0.9453125, "step": 160 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.272013990977655e-09, "aux_brier/mean_r": 0.7873156126588583, "aux_brier/n_active_tok": 220.75, "aux_brier/n_step_records": 55.1875, "aux_brier/std_r": 0.23441861057654023, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5611812659846547, "calib/avg_num_step_conf": 7.76171875, "calib/ece": 0.20896626984126992, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.1746031746031746, "calib/gap": 0.05024472506393851, "calib/mean_conf": 0.6311130952380952, "calib/mu_c": 0.6446711956521739, "calib/mu_w": 0.5944264705882354, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05496031746031751, "calib/std_conf": 0.2503548673984631, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2495.0, "completions/max_terminated_length": 2495.0, "completions/mean_length": 528.6875, "completions/mean_terminated_length": 530.7608032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.17173333333333332, "grad_norm": 0.011234656907618046, "learning_rate": 1.0833333333333335e-06, "loss": 0.0436, "num_tokens": 35546029.0, "reward": 1.3931267261505127, "reward_std": 0.2290090024471283, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7365691661834717, "rewards/format_reward_step": 0.98046875, "step": 161 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.134715598733905e-08, "aux_brier/mean_r": 0.7546740137040615, "aux_brier/n_active_tok": 242.375, "aux_brier/n_step_records": 60.59375, "aux_brier/std_r": 0.2563911615870893, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4144365945907372, "calib/avg_num_step_conf": 8.25390625, "calib/ece": 0.25751968503937006, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2125984251968504, "calib/gap": -0.05339163633768651, "calib/mean_conf": 0.6695669291338583, "calib/mu_c": 0.6525404624277457, "calib/mu_w": 0.7059320987654322, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12299212598425198, "calib/std_conf": 0.25726237688371983, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2533.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 545.04296875, "completions/mean_terminated_length": 545.04296875, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.1728, "grad_norm": 0.007306816056370735, "learning_rate": 1.0555555555555557e-06, "loss": 0.013, "num_tokens": 35789704.0, "reward": 1.338191032409668, "reward_std": 0.26437437534332275, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6808887720108032, "rewards/format_reward_step": 0.984375, "step": 162 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.3258712988628574e-08, "aux_brier/mean_r": 0.746513593941927, "aux_brier/n_active_tok": 257.0, "aux_brier/n_step_records": 64.25, "aux_brier/std_r": 0.2549468372017145, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.46562541275921276, "calib/avg_num_step_conf": 9.47265625, "calib/ece": 0.2614599190283401, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.18218623481781376, "calib/gap": -0.02300170387003042, "calib/mean_conf": 0.6255036437246962, "calib/mu_c": 0.6149805970149254, "calib/mu_w": 0.6379823008849558, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.17222672064777333, "calib/std_conf": 0.2808097425752618, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2628.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 648.37890625, "completions/mean_terminated_length": 653.4842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.17386666666666667, "grad_norm": 0.021517354995012283, "learning_rate": 1.0277777777777777e-06, "loss": 0.0269, "num_tokens": 36060521.0, "reward": 1.150056004524231, "reward_std": 0.3330397605895996, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6158487796783447, "rewards/format_reward_step": 0.9453125, "step": 163 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.9652870997255434e-08, "aux_brier/mean_r": 0.7571022156625986, "aux_brier/n_active_tok": 255.25, "aux_brier/n_step_records": 63.8125, "aux_brier/std_r": 0.25739692244678736, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5156793213739396, "calib/avg_num_step_conf": 8.92578125, "calib/ece": 0.22299176954732508, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.23868312757201646, "calib/gap": 0.02158260325406758, "calib/mean_conf": 0.6448271604938274, "calib/mu_c": 0.653886524822695, "calib/mu_w": 0.6323039215686275, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.14378600823045268, "calib/std_conf": 0.27255823359905457, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 637.48046875, "completions/mean_terminated_length": 642.5, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.17493333333333333, "grad_norm": 0.00691978307440877, "learning_rate": 1.0000000000000002e-06, "loss": 0.026, "num_tokens": 36329852.0, "reward": 1.1841471195220947, "reward_std": 0.3380683660507202, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6506508588790894, "rewards/format_reward_step": 0.94140625, "step": 164 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.055046999238641e-08, "aux_brier/mean_r": 0.75985062494874, "aux_brier/n_active_tok": 249.125, "aux_brier/n_step_records": 62.28125, "aux_brier/std_r": 0.2503046104684472, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5939559405008473, "calib/avg_num_step_conf": 8.57421875, "calib/ece": 0.263720472440945, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.23228346456692914, "calib/gap": 0.08857660202096262, "calib/mean_conf": 0.6696259842519685, "calib/mu_c": 0.7187964601769912, "calib/mu_w": 0.6302198581560285, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24423228346456705, "calib/std_conf": 0.2577866875033552, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1656.0, "completions/max_terminated_length": 1656.0, "completions/mean_length": 587.26953125, "completions/mean_terminated_length": 589.5725708007812, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.176, "grad_norm": 0.00832296907901764, "learning_rate": 9.722222222222224e-07, "loss": -0.0329, "num_tokens": 36585769.0, "reward": 1.1033964157104492, "reward_std": 0.25608399510383606, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6713979244232178, "rewards/format_reward_step": 0.98828125, "step": 165 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.1471011392437678e-08, "aux_brier/mean_r": 0.7423290703445673, "aux_brier/n_active_tok": 273.25, "aux_brier/n_step_records": 68.3125, "aux_brier/std_r": 0.27032877411693335, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47744701617401003, "calib/avg_num_step_conf": 9.61328125, "calib/ece": 0.2805422310756972, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.2788844621513944, "calib/gap": -0.039877091466815395, "calib/mean_conf": 0.6530832669322709, "calib/mu_c": 0.6391024539877301, "calib/mu_w": 0.6789795454545455, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.14211155378486062, "calib/std_conf": 0.30049801803591053, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2493.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 644.96875, "completions/mean_terminated_length": 650.0472412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.17706666666666668, "grad_norm": 0.02036803960800171, "learning_rate": 9.444444444444445e-07, "loss": 0.0096, "num_tokens": 36857065.0, "reward": 1.2705461978912354, "reward_std": 0.27468210458755493, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.6368721723556519, "rewards/format_reward_step": 0.94921875, "step": 166 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.708864284608261e-08, "aux_brier/mean_r": 0.7509384918957949, "aux_brier/n_active_tok": 249.625, "aux_brier/n_step_records": 62.40625, "aux_brier/std_r": 0.25937409279868007, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5693521594684385, "calib/avg_num_step_conf": 8.79296875, "calib/ece": 0.18868897637795282, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2559055118110236, "calib/gap": 0.043108181063122974, "calib/mean_conf": 0.6406811023622048, "calib/mu_c": 0.6552767857142857, "calib/mu_w": 0.6121686046511627, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.08397637795275599, "calib/std_conf": 0.28739722345967106, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1779.0, "completions/max_terminated_length": 1779.0, "completions/mean_length": 537.484375, "completions/mean_terminated_length": 539.5921630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.17813333333333334, "grad_norm": 0.009047247469425201, "learning_rate": 9.166666666666666e-07, "loss": -0.0137, "num_tokens": 37100269.0, "reward": 1.3186213970184326, "reward_std": 0.2996622920036316, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6963607668876648, "rewards/format_reward_step": 0.9765625, "step": 167 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.60023536782683e-08, "aux_brier/mean_r": 0.7647078614681959, "aux_brier/n_active_tok": 249.0, "aux_brier/n_step_records": 62.25, "aux_brier/std_r": 0.2474137358367443, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4929739313152538, "calib/avg_num_step_conf": 8.515625, "calib/ece": 0.24151181102362218, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.22440944881889763, "calib/gap": -0.0011848447041827859, "calib/mean_conf": 0.6768346456692913, "calib/mu_c": 0.6763821656050956, "calib/mu_w": 0.6775670103092784, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1501181102362206, "calib/std_conf": 0.26428847469156824, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 627.359375, "completions/mean_terminated_length": 629.8196411132812, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.1792, "grad_norm": 0.0073885307647287846, "learning_rate": 8.88888888888889e-07, "loss": 0.0038, "num_tokens": 37365545.0, "reward": 1.2751530408859253, "reward_std": 0.34935617446899414, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6787370443344116, "rewards/format_reward_step": 0.984375, "step": 168 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.745325131785052e-10, "aux_brier/mean_r": 0.7467086259275675, "aux_brier/n_active_tok": 252.875, "aux_brier/n_step_records": 63.21875, "aux_brier/std_r": 0.2576060048304498, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6119211822660098, "calib/avg_num_step_conf": 8.76953125, "calib/ece": 0.23184200000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.264, "calib/gap": 0.07556798029556655, "calib/mean_conf": 0.680558, "calib/mu_c": 0.7122965517241381, "calib/mu_w": 0.6367285714285715, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16620000000000004, "calib/std_conf": 0.27035127452261065, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1654.0, "completions/max_terminated_length": 1654.0, "completions/mean_length": 590.42578125, "completions/mean_terminated_length": 595.0748291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.18026666666666666, "grad_norm": 0.012090523727238178, "learning_rate": 8.611111111111112e-07, "loss": 0.0049, "num_tokens": 37620878.0, "reward": 1.2254528999328613, "reward_std": 0.3342924416065216, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6908740401268005, "rewards/format_reward_step": 0.97265625, "step": 169 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.9953606140462625e-08, "aux_brier/mean_r": 0.7363214287906885, "aux_brier/n_active_tok": 271.25, "aux_brier/n_step_records": 67.8125, "aux_brier/std_r": 0.2685506329871714, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49300180598555216, "calib/avg_num_step_conf": 9.3203125, "calib/ece": 0.25611023622047246, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2440944881889764, "calib/gap": -0.008303083075335449, "calib/mean_conf": 0.6712125984251969, "calib/mu_c": 0.6678782894736842, "calib/mu_w": 0.6761813725490197, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.16444881889763782, "calib/std_conf": 0.28287358824728376, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1843.0, "completions/max_terminated_length": 1843.0, "completions/mean_length": 593.45703125, "completions/mean_terminated_length": 595.7843627929688, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.18133333333333335, "grad_norm": 0.010588682256639004, "learning_rate": 8.333333333333333e-07, "loss": 0.0097, "num_tokens": 37876955.0, "reward": 1.2502719163894653, "reward_std": 0.2966703474521637, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6573377847671509, "rewards/format_reward_step": 0.984375, "step": 170 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.119484327604674e-08, "aux_brier/mean_r": 0.7529121600091457, "aux_brier/n_active_tok": 252.0, "aux_brier/n_step_records": 63.0, "aux_brier/std_r": 0.2586530111730099, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5358080239970002, "calib/avg_num_step_conf": 8.66015625, "calib/ece": 0.2667332015810277, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.22134387351778656, "calib/gap": 0.03840213723284602, "calib/mean_conf": 0.6467055335968378, "calib/mu_c": 0.6658307086614175, "calib/mu_w": 0.6274285714285714, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20573122529644272, "calib/std_conf": 0.2741543506012061, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1863.0, "completions/max_terminated_length": 1863.0, "completions/mean_length": 550.546875, "completions/mean_terminated_length": 552.7059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.1824, "grad_norm": 0.021256277337670326, "learning_rate": 8.055555555555557e-07, "loss": 0.0022, "num_tokens": 38124791.0, "reward": 1.154085636138916, "reward_std": 0.38547608256340027, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6632174253463745, "rewards/format_reward_step": 0.984375, "step": 171 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.511857551602887e-09, "aux_brier/mean_r": 0.7468444555997849, "aux_brier/n_active_tok": 249.875, "aux_brier/n_step_records": 62.46875, "aux_brier/std_r": 0.2594641740433872, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.45944412932501416, "calib/avg_num_step_conf": 8.4921875, "calib/ece": 0.24733799999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.292, "calib/gap": -0.017267583664208885, "calib/mean_conf": 0.711062, "calib/mu_c": 0.7051219512195122, "calib/mu_w": 0.722389534883721, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1512, "calib/std_conf": 0.2424412117524576, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3071.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 556.98828125, "completions/mean_terminated_length": 561.3740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.18346666666666667, "grad_norm": 0.010089898481965065, "learning_rate": 7.777777777777779e-07, "loss": 0.0327, "num_tokens": 38370732.0, "reward": 1.3009603023529053, "reward_std": 0.3123309016227722, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6882159113883972, "rewards/format_reward_step": 0.9765625, "step": 172 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.081013574297465e-08, "aux_brier/mean_r": 0.756576718762517, "aux_brier/n_active_tok": 256.875, "aux_brier/n_step_records": 64.21875, "aux_brier/std_r": 0.259043853264302, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.55408010280574, "calib/avg_num_step_conf": 10.04296875, "calib/ece": 0.1954838709677419, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.2540322580645161, "calib/gap": 0.03836710216320427, "calib/mean_conf": 0.6623387096774194, "calib/mu_c": 0.6757981366459628, "calib/mu_w": 0.6374310344827585, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.10431451612903221, "calib/std_conf": 0.2612917048699, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 643.8125, "completions/mean_terminated_length": 648.8818969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.18453333333333333, "grad_norm": 0.02186598628759384, "learning_rate": 7.5e-07, "loss": 0.0006, "num_tokens": 38638708.0, "reward": 1.2822065353393555, "reward_std": 0.3207966089248657, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6913260221481323, "rewards/format_reward_step": 0.9609375, "step": 173 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.554803226397013e-09, "aux_brier/mean_r": 0.7429994307458401, "aux_brier/n_active_tok": 262.75, "aux_brier/n_step_records": 65.6875, "aux_brier/std_r": 0.26765774516388774, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4566137566137566, "calib/avg_num_step_conf": 9.296875, "calib/ece": 0.29623495934959354, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.2682926829268293, "calib/gap": -0.03009849206349202, "calib/mean_conf": 0.659618699186992, "calib/mu_c": 0.6449365079365079, "calib/mu_w": 0.6750349999999999, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.22182926829268296, "calib/std_conf": 0.2844226556530165, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2679.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 662.79296875, "completions/mean_terminated_length": 670.6522216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1856, "grad_norm": 0.01314226072281599, "learning_rate": 7.222222222222222e-07, "loss": -0.008, "num_tokens": 38912615.0, "reward": 1.1187838315963745, "reward_std": 0.3720570206642151, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6001352071762085, "rewards/format_reward_step": 0.953125, "step": 174 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.37002427353017e-09, "aux_brier/mean_r": 0.7477692756801844, "aux_brier/n_active_tok": 245.125, "aux_brier/n_step_records": 61.28125, "aux_brier/std_r": 0.2681192150339484, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4695669934640523, "calib/avg_num_step_conf": 9.27734375, "calib/ece": 0.3137743902439025, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.24796747967479674, "calib/gap": -0.01707169117647045, "calib/mean_conf": 0.619640243902439, "calib/mu_c": 0.6096470588235295, "calib/mu_w": 0.62671875, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2593902439024391, "calib/std_conf": 0.3013864467606289, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 585.98828125, "completions/mean_terminated_length": 597.661376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.18666666666666668, "grad_norm": 0.01680920645594597, "learning_rate": 6.944444444444446e-07, "loss": -0.0409, "num_tokens": 39168452.0, "reward": 1.0177991390228271, "reward_std": 0.3215849995613098, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.586821436882019, "rewards/format_reward_step": 0.9453125, "step": 175 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.2705953284241076e-08, "aux_brier/mean_r": 0.7499426323920488, "aux_brier/n_active_tok": 280.125, "aux_brier/n_step_records": 70.03125, "aux_brier/std_r": 0.25741465017199516, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.521867384891625, "calib/avg_num_step_conf": 9.73046875, "calib/ece": 0.22510756972111554, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.250996015936255, "calib/gap": 0.030381685263562885, "calib/mean_conf": 0.6487569721115537, "calib/mu_c": 0.6624347826086956, "calib/mu_w": 0.6320530973451327, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16203187250996015, "calib/std_conf": 0.2710706798003163, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 625.1796875, "completions/mean_terminated_length": 627.6314086914062, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.18773333333333334, "grad_norm": 0.006208065431565046, "learning_rate": 6.666666666666667e-07, "loss": 0.0357, "num_tokens": 39432562.0, "reward": 1.1970179080963135, "reward_std": 0.3304113745689392, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.670884370803833, "rewards/format_reward_step": 0.98046875, "step": 176 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.622284712791803e-08, "aux_brier/mean_r": 0.7483648955821991, "aux_brier/n_active_tok": 274.375, "aux_brier/n_step_records": 68.59375, "aux_brier/std_r": 0.2583274398930371, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5642527165072548, "calib/avg_num_step_conf": 9.859375, "calib/ece": 0.21650240000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.22, "calib/gap": 0.0630541739865964, "calib/mean_conf": 0.6342175999999999, "calib/mu_c": 0.661709219858156, "calib/mu_w": 0.5986550458715596, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.14336000000000004, "calib/std_conf": 0.29128347445442215, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2775.0, "completions/max_terminated_length": 2775.0, "completions/mean_length": 614.16015625, "completions/mean_terminated_length": 621.4426879882812, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.1888, "grad_norm": 0.01324143260717392, "learning_rate": 6.388888888888889e-07, "loss": 0.0377, "num_tokens": 39693619.0, "reward": 1.2016162872314453, "reward_std": 0.29538553953170776, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6736522912979126, "rewards/format_reward_step": 0.96484375, "step": 177 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1087757462169279e-08, "aux_brier/mean_r": 0.7463778499513865, "aux_brier/n_active_tok": 276.5, "aux_brier/n_step_records": 69.125, "aux_brier/std_r": 0.2625624155625701, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4606935617573915, "calib/avg_num_step_conf": 9.9375, "calib/ece": 0.2671092741935483, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.3024193548387097, "calib/gap": -0.013360776457584933, "calib/mean_conf": 0.6766810483870968, "calib/mu_c": 0.6716168831168832, "calib/mu_w": 0.6849776595744681, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1614112903225806, "calib/std_conf": 0.27401180205427267, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2722.0, "completions/max_terminated_length": 2722.0, "completions/mean_length": 592.03125, "completions/mean_terminated_length": 594.3529663085938, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.18986666666666666, "grad_norm": 0.009170177392661572, "learning_rate": 6.111111111111112e-07, "loss": 0.0357, "num_tokens": 39951251.0, "reward": 1.2478896379470825, "reward_std": 0.36079686880111694, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6556211709976196, "rewards/format_reward_step": 0.96484375, "step": 178 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1923261267199337e-08, "aux_brier/mean_r": 0.7234301157295704, "aux_brier/n_active_tok": 274.25, "aux_brier/n_step_records": 68.5625, "aux_brier/std_r": 0.27818348025903106, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5527871621621621, "calib/avg_num_step_conf": 9.890625, "calib/ece": 0.2184467213114753, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.319672131147541, "calib/gap": 0.0622081925675676, "calib/mean_conf": 0.7167172131147539, "calib/mu_c": 0.7411925675675677, "calib/mu_w": 0.6789843750000001, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1643032786885245, "calib/std_conf": 0.2819123365930991, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2911.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 620.0, "completions/mean_terminated_length": 627.351806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.19093333333333334, "grad_norm": 0.012826989404857159, "learning_rate": 5.833333333333334e-07, "loss": 0.0261, "num_tokens": 40216235.0, "reward": 1.2223281860351562, "reward_std": 0.3606985807418823, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6627501249313354, "rewards/format_reward_step": 0.94921875, "step": 179 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.5458275076651873e-09, "aux_brier/mean_r": 0.7329331804066896, "aux_brier/n_active_tok": 285.5, "aux_brier/n_step_records": 71.375, "aux_brier/std_r": 0.26681223371997476, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5590105770518142, "calib/avg_num_step_conf": 10.59765625, "calib/ece": 0.20687848605577697, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.29880478087649404, "calib/gap": 0.06085369527379847, "calib/mean_conf": 0.6708107569721116, "calib/mu_c": 0.6943279220779222, "calib/mu_w": 0.6334742268041237, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1320717131474104, "calib/std_conf": 0.28432595038016906, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2711.0, "completions/max_terminated_length": 2711.0, "completions/mean_length": 706.1171875, "completions/mean_terminated_length": 711.6771850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.192, "grad_norm": 0.014191859401762486, "learning_rate": 5.555555555555555e-07, "loss": 0.027, "num_tokens": 40500857.0, "reward": 1.2598047256469727, "reward_std": 0.2820780575275421, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6876566410064697, "rewards/format_reward_step": 0.97265625, "step": 180 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.855458071382433e-09, "aux_brier/mean_r": 0.7478128615766764, "aux_brier/n_active_tok": 263.0, "aux_brier/n_step_records": 65.75, "aux_brier/std_r": 0.2553107528947294, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49666078629032256, "calib/avg_num_step_conf": 9.1875, "calib/ece": 0.2993988095238096, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.30158730158730157, "calib/gap": -0.012836189516129193, "calib/mean_conf": 0.6853630952380952, "calib/mu_c": 0.6790468749999999, "calib/mu_w": 0.6918830645161291, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23841269841269846, "calib/std_conf": 0.26112610244120327, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2842.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 565.328125, "completions/mean_terminated_length": 567.5451049804688, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.19306666666666666, "grad_norm": 0.00830915942788124, "learning_rate": 5.277777777777779e-07, "loss": 0.0338, "num_tokens": 40751845.0, "reward": 1.1506667137145996, "reward_std": 0.318827748298645, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6339172124862671, "rewards/format_reward_step": 0.984375, "step": 181 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.4662184927869433e-10, "aux_brier/mean_r": 0.7318172082304955, "aux_brier/n_active_tok": 262.625, "aux_brier/n_step_records": 65.65625, "aux_brier/std_r": 0.2725449204444885, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5128259486962051, "calib/avg_num_step_conf": 9.37890625, "calib/ece": 0.21181693548387098, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.3024193548387097, "calib/gap": 0.0121046851812594, "calib/mean_conf": 0.6917314516129032, "calib/mu_c": 0.6960754716981133, "calib/mu_w": 0.683970786516854, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13120967741935488, "calib/std_conf": 0.2670023104185815, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 607.6171875, "completions/mean_terminated_length": 612.4015502929688, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.19413333333333332, "grad_norm": 0.013810195028781891, "learning_rate": 5.000000000000001e-07, "loss": 0.0177, "num_tokens": 41013555.0, "reward": 1.2725276947021484, "reward_std": 0.32726791501045227, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6760484576225281, "rewards/format_reward_step": 0.96484375, "step": 182 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.4734916337365576e-08, "aux_brier/mean_r": 0.7415572851896286, "aux_brier/n_active_tok": 277.125, "aux_brier/n_step_records": 69.28125, "aux_brier/std_r": 0.25660434272140265, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5408628779979144, "calib/avg_num_step_conf": 10.703125, "calib/ece": 0.23896987951807241, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.285140562248996, "calib/gap": 0.040723279457768435, "calib/mean_conf": 0.6991024096385543, "calib/mu_c": 0.717419708029197, "calib/mu_w": 0.6766964285714285, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19393574297188765, "calib/std_conf": 0.26251750477916475, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 664.8125, "completions/mean_terminated_length": 670.0472412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.1952, "grad_norm": 0.0092347152531147, "learning_rate": 4.7222222222222226e-07, "loss": 0.0285, "num_tokens": 41290427.0, "reward": 1.1872224807739258, "reward_std": 0.3446572422981262, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6629521250724792, "rewards/format_reward_step": 0.97265625, "step": 183 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.543451400356279e-09, "aux_brier/mean_r": 0.7541378326714039, "aux_brier/n_active_tok": 285.875, "aux_brier/n_step_records": 71.46875, "aux_brier/std_r": 0.2524599856697023, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.514677961150869, "calib/avg_num_step_conf": 10.50390625, "calib/ece": 0.23684578313253019, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.27710843373493976, "calib/gap": 0.01565354899956184, "calib/mean_conf": 0.6823510040160642, "calib/mu_c": 0.6875059880239521, "calib/mu_w": 0.6718524390243903, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12425702811244989, "calib/std_conf": 0.27456539138350683, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 597.9296875, "completions/mean_terminated_length": 605.019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.19626666666666667, "grad_norm": 0.014518826268613338, "learning_rate": 4.444444444444445e-07, "loss": 0.02, "num_tokens": 41548777.0, "reward": 1.3124616146087646, "reward_std": 0.3099721670150757, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6873465776443481, "rewards/format_reward_step": 0.96875, "step": 184 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.4955855519305228e-08, "aux_brier/mean_r": 0.7305880356580019, "aux_brier/n_active_tok": 275.0, "aux_brier/n_step_records": 68.75, "aux_brier/std_r": 0.2709053694270551, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4418304866258459, "calib/avg_num_step_conf": 9.484375, "calib/ece": 0.30433134920634924, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2857142857142857, "calib/gap": -0.05702226877215588, "calib/mean_conf": 0.6832876984126984, "calib/mu_c": 0.6590758620689655, "calib/mu_w": 0.7160981308411214, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20611111111111113, "calib/std_conf": 0.27206760066718333, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2037.0, "completions/max_terminated_length": 2037.0, "completions/mean_length": 589.58984375, "completions/mean_terminated_length": 591.9019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.19733333333333333, "grad_norm": 0.008655454963445663, "learning_rate": 4.1666666666666667e-07, "loss": 0.0092, "num_tokens": 41806632.0, "reward": 1.216625452041626, "reward_std": 0.32438236474990845, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6321268677711487, "rewards/format_reward_step": 0.984375, "step": 185 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.546461705005726e-09, "aux_brier/mean_r": 0.7264175526797771, "aux_brier/n_active_tok": 281.75, "aux_brier/n_step_records": 70.4375, "aux_brier/std_r": 0.2794937454164028, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5449413535721006, "calib/avg_num_step_conf": 10.16015625, "calib/ece": 0.27762929687499993, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.33203125, "calib/gap": 0.03217100294800246, "calib/mean_conf": 0.669948828125, "calib/mu_c": 0.6833953020134229, "calib/mu_w": 0.6512242990654205, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18277343749999997, "calib/std_conf": 0.2965103697573269, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2057.0, "completions/max_terminated_length": 2057.0, "completions/mean_length": 611.33984375, "completions/mean_terminated_length": 613.7373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.1984, "grad_norm": 0.009276865050196648, "learning_rate": 3.8888888888888895e-07, "loss": 0.0036, "num_tokens": 42068175.0, "reward": 1.2512147426605225, "reward_std": 0.3001707196235657, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6767337322235107, "rewards/format_reward_step": 1.0, "step": 186 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.162319625222182e-09, "aux_brier/mean_r": 0.7304468788206577, "aux_brier/n_active_tok": 281.75, "aux_brier/n_step_records": 70.4375, "aux_brier/std_r": 0.26488793548196554, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5560472972972973, "calib/avg_num_step_conf": 11.26171875, "calib/ece": 0.20829233870967753, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.2862903225806452, "calib/gap": 0.06778418918918916, "calib/mean_conf": 0.6659818548387096, "calib/mu_c": 0.6933141891891892, "calib/mu_w": 0.62553, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13875000000000012, "calib/std_conf": 0.2852319349708316, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2952.0, "completions/max_terminated_length": 2952.0, "completions/mean_length": 649.92578125, "completions/mean_terminated_length": 657.6324462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.19946666666666665, "grad_norm": 0.016152750700712204, "learning_rate": 3.611111111111111e-07, "loss": 0.0369, "num_tokens": 42336100.0, "reward": 1.230811595916748, "reward_std": 0.3586818277835846, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6810586452484131, "rewards/format_reward_step": 0.96484375, "step": 187 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.4718443611404517e-09, "aux_brier/mean_r": 0.7136568035930395, "aux_brier/n_active_tok": 280.625, "aux_brier/n_step_records": 70.15625, "aux_brier/std_r": 0.2826611357741058, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5507750397456279, "calib/avg_num_step_conf": 10.25390625, "calib/ece": 0.28660040485829963, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.30364372469635625, "calib/gap": 0.03535854531001603, "calib/mean_conf": 0.6602821862348178, "calib/mu_c": 0.6761720588235295, "calib/mu_w": 0.6408135135135135, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19813765182186238, "calib/std_conf": 0.30199523068880263, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2776.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 662.83984375, "completions/mean_terminated_length": 665.4392700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.20053333333333334, "grad_norm": 0.0070601268671453, "learning_rate": 3.3333333333333335e-07, "loss": 0.0368, "num_tokens": 42609859.0, "reward": 1.1745184659957886, "reward_std": 0.3221679925918579, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6433863639831543, "rewards/format_reward_step": 0.96484375, "step": 188 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1143694855775266e-09, "aux_brier/mean_r": 0.7563605438917875, "aux_brier/n_active_tok": 274.5, "aux_brier/n_step_records": 68.625, "aux_brier/std_r": 0.24481907626613975, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47058221281823764, "calib/avg_num_step_conf": 9.2578125, "calib/ece": 0.2560813492063492, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2619047619047619, "calib/gap": -0.009304586717630126, "calib/mean_conf": 0.6940773809523808, "calib/mu_c": 0.6907173913043478, "calib/mu_w": 0.700021978021978, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15563492063492063, "calib/std_conf": 0.2407992622222738, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 614.3046875, "completions/mean_terminated_length": 616.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.2016, "grad_norm": 0.008702950552105904, "learning_rate": 3.055555555555556e-07, "loss": 0.0203, "num_tokens": 42874889.0, "reward": 1.2943354845046997, "reward_std": 0.29039430618286133, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6929672956466675, "rewards/format_reward_step": 0.984375, "step": 189 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.449038322665075e-08, "aux_brier/mean_r": 0.7434644736349583, "aux_brier/n_active_tok": 270.25, "aux_brier/n_step_records": 67.5625, "aux_brier/std_r": 0.26447512675076723, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5616364898113684, "calib/avg_num_step_conf": 10.05078125, "calib/ece": 0.23601984126984132, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.23412698412698413, "calib/gap": 0.0715012932937984, "calib/mean_conf": 0.606202380952381, "calib/mu_c": 0.6405343511450381, "calib/mu_w": 0.5690330578512397, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.16119047619047625, "calib/std_conf": 0.3033550501028498, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2685.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 644.64453125, "completions/mean_terminated_length": 647.172607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.20266666666666666, "grad_norm": 0.008129044435918331, "learning_rate": 2.7777777777777776e-07, "loss": -0.0007, "num_tokens": 43145526.0, "reward": 1.1673336029052734, "reward_std": 0.3370707631111145, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6693342924118042, "rewards/format_reward_step": 0.9765625, "step": 190 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.819200926155844e-09, "aux_brier/mean_r": 0.7141981646418571, "aux_brier/n_active_tok": 287.25, "aux_brier/n_step_records": 71.8125, "aux_brier/std_r": 0.28062421223148704, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4711366538952746, "calib/avg_num_step_conf": 10.65625, "calib/ece": 0.3168609561752988, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.350597609561753, "calib/gap": -0.02275160280970634, "calib/mean_conf": 0.7044139442231075, "calib/mu_c": 0.6938992592592592, "calib/mu_w": 0.7166508620689656, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24171314741035857, "calib/std_conf": 0.294562895164936, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2793.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 597.73046875, "completions/mean_terminated_length": 602.43701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.20373333333333332, "grad_norm": 0.014080878347158432, "learning_rate": 2.5000000000000004e-07, "loss": 0.0491, "num_tokens": 43402713.0, "reward": 1.1681525707244873, "reward_std": 0.26598060131073, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6101104021072388, "rewards/format_reward_step": 0.9765625, "step": 191 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.462629692361439e-08, "aux_brier/mean_r": 0.7537436224520206, "aux_brier/n_active_tok": 263.0, "aux_brier/n_step_records": 65.75, "aux_brier/std_r": 0.25597084290347993, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.501932072401871, "calib/avg_num_step_conf": 9.5234375, "calib/ece": 0.2486262096774194, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.2903225806451613, "calib/gap": 0.019521368042844545, "calib/mean_conf": 0.6747608870967742, "calib/mu_c": 0.6825536912751679, "calib/mu_w": 0.6630323232323233, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.16129032258064518, "calib/std_conf": 0.2692965456114232, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2135.0, "completions/max_terminated_length": 2135.0, "completions/mean_length": 596.7578125, "completions/mean_terminated_length": 608.6454467773438, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.2048, "grad_norm": 0.019266357645392418, "learning_rate": 2.2222222222222224e-07, "loss": -0.0687, "num_tokens": 43660459.0, "reward": 1.2282748222351074, "reward_std": 0.3212256133556366, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6630992889404297, "rewards/format_reward_step": 0.9609375, "step": 192 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.0201361462107714e-08, "aux_brier/mean_r": 0.7247827351093292, "aux_brier/n_active_tok": 289.75, "aux_brier/n_step_records": 72.4375, "aux_brier/std_r": 0.2665173173882067, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48191421254801536, "calib/avg_num_step_conf": 9.90625, "calib/ece": 0.26145277777777776, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3055555555555556, "calib/gap": 0.0023942509603072093, "calib/mean_conf": 0.7141027777777778, "calib/mu_c": 0.7151478873239436, "calib/mu_w": 0.7127536363636364, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20603174603174604, "calib/std_conf": 0.25286715054671943, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 632.375, "completions/mean_terminated_length": 632.375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.20586666666666667, "grad_norm": 0.006684689316898584, "learning_rate": 1.9444444444444447e-07, "loss": 0.0375, "num_tokens": 43928059.0, "reward": 1.2114092111587524, "reward_std": 0.36292076110839844, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6581370830535889, "rewards/format_reward_step": 0.984375, "step": 193 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.071868663941714e-08, "aux_brier/mean_r": 0.7462091688066721, "aux_brier/n_active_tok": 267.0, "aux_brier/n_step_records": 66.75, "aux_brier/std_r": 0.2641845620237291, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5724805177651565, "calib/avg_num_step_conf": 10.18359375, "calib/ece": 0.2752145748987854, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.3319838056680162, "calib/gap": 0.05406458856161678, "calib/mean_conf": 0.70348987854251, "calib/mu_c": 0.728223880597015, "calib/mu_w": 0.6741592920353983, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21809716599190282, "calib/std_conf": 0.2564129207546779, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 599.953125, "completions/mean_terminated_length": 609.4761962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.20693333333333333, "grad_norm": 0.013640095479786396, "learning_rate": 1.6666666666666668e-07, "loss": 0.0222, "num_tokens": 44187591.0, "reward": 1.1686592102050781, "reward_std": 0.3678101897239685, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6590118408203125, "rewards/format_reward_step": 0.9609375, "step": 194 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.7734939405933403e-08, "aux_brier/mean_r": 0.7605474106967449, "aux_brier/n_active_tok": 248.875, "aux_brier/n_step_records": 62.21875, "aux_brier/std_r": 0.2591217868030071, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.40877483443708607, "calib/avg_num_step_conf": 9.28125, "calib/ece": 0.31400079681274906, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.27091633466135456, "calib/gap": -0.08896708609271531, "calib/mean_conf": 0.6538079681274901, "calib/mu_c": 0.6183629139072847, "calib/mu_w": 0.70733, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18310756972111558, "calib/std_conf": 0.27212055481137687, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 616.3046875, "completions/mean_terminated_length": 621.157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.208, "grad_norm": 0.008334558457136154, "learning_rate": 1.3888888888888888e-07, "loss": 0.021, "num_tokens": 44451349.0, "reward": 1.237173318862915, "reward_std": 0.27236270904541016, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6283807158470154, "rewards/format_reward_step": 0.98046875, "step": 195 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.458378372567239e-08, "aux_brier/mean_r": 0.7473798841238022, "aux_brier/n_active_tok": 250.625, "aux_brier/n_step_records": 62.65625, "aux_brier/std_r": 0.26159650878980756, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5451818641070978, "calib/avg_num_step_conf": 8.66796875, "calib/ece": 0.22050784313725497, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2823529411764706, "calib/gap": 0.04137114801717623, "calib/mean_conf": 0.6878058823529412, "calib/mu_c": 0.7051655405405407, "calib/mu_w": 0.6637943925233645, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16396078431372554, "calib/std_conf": 0.2559623795977047, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 484.32421875, "completions/mean_terminated_length": 486.22357177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.20906666666666668, "grad_norm": 0.012983839958906174, "learning_rate": 1.1111111111111112e-07, "loss": 0.0036, "num_tokens": 44677880.0, "reward": 1.2503784894943237, "reward_std": 0.23702460527420044, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6968265771865845, "rewards/format_reward_step": 0.99609375, "step": 196 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.046964083125436e-08, "aux_brier/mean_r": 0.7440295200794935, "aux_brier/n_active_tok": 279.125, "aux_brier/n_step_records": 69.78125, "aux_brier/std_r": 0.25882862880825996, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49758085052202694, "calib/avg_num_step_conf": 9.515625, "calib/ece": 0.28796812749003997, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.29880478087649404, "calib/gap": 0.0037305194805195008, "calib/mean_conf": 0.692390438247012, "calib/mu_c": 0.6941590909090909, "calib/mu_w": 0.6904285714285714, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22723107569721124, "calib/std_conf": 0.2725704246659875, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 588.5078125, "completions/mean_terminated_length": 588.5078125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.21013333333333334, "grad_norm": 0.006923824083060026, "learning_rate": 8.333333333333334e-08, "loss": 0.0526, "num_tokens": 44933594.0, "reward": 1.1597187519073486, "reward_std": 0.34860312938690186, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6310625076293945, "rewards/format_reward_step": 0.97265625, "step": 197 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.706577267863608e-09, "aux_brier/mean_r": 0.7263111285865307, "aux_brier/n_active_tok": 264.5, "aux_brier/n_step_records": 66.125, "aux_brier/std_r": 0.2693498474545777, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4706667579408544, "calib/avg_num_step_conf": 9.02734375, "calib/ece": 0.2571751968503938, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3188976377952756, "calib/gap": -0.03745755750273816, "calib/mean_conf": 0.725974409448819, "calib/mu_c": 0.7129969879518072, "calib/mu_w": 0.7504545454545454, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16480314960629935, "calib/std_conf": 0.246053596552905, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 539.62890625, "completions/mean_terminated_length": 541.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.2112, "grad_norm": 0.0072466181591153145, "learning_rate": 5.555555555555556e-08, "loss": 0.0275, "num_tokens": 45177123.0, "reward": 1.3137493133544922, "reward_std": 0.2786206901073456, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6846849918365479, "rewards/format_reward_step": 0.98828125, "step": 198 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.991618957253305e-08, "aux_brier/mean_r": 0.7149926163256168, "aux_brier/n_active_tok": 289.875, "aux_brier/n_step_records": 72.46875, "aux_brier/std_r": 0.2789755454286933, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.44509690668720314, "calib/avg_num_step_conf": 10.328125, "calib/ece": 0.3163873517786561, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.35177865612648224, "calib/gap": -0.03472291746887435, "calib/mean_conf": 0.7104901185770752, "calib/mu_c": 0.6959421768707483, "calib/mu_w": 0.7306650943396227, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22292490118577077, "calib/std_conf": 0.28071017169893014, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2779.0, "completions/max_terminated_length": 2779.0, "completions/mean_length": 649.15234375, "completions/mean_terminated_length": 651.6980590820312, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.21226666666666666, "grad_norm": 0.009777950122952461, "learning_rate": 2.777777777777778e-08, "loss": 0.0243, "num_tokens": 45447506.0, "reward": 1.2253589630126953, "reward_std": 0.34273290634155273, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6358109712600708, "rewards/format_reward_step": 0.984375, "step": 199 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.601623774888822e-09, "aux_brier/mean_r": 0.7408849187195301, "aux_brier/n_active_tok": 260.75, "aux_brier/n_step_records": 65.1875, "aux_brier/std_r": 0.2614397117868066, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5297192982456141, "calib/avg_num_step_conf": 9.4609375, "calib/ece": 0.2254473469387755, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.2979591836734694, "calib/gap": 0.005959017543859524, "calib/mean_conf": 0.7216546938775511, "calib/mu_c": 0.7239653333333332, "calib/mu_w": 0.7180063157894737, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.16742857142857143, "calib/std_conf": 0.24801964773307977, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2622.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 597.5703125, "completions/mean_terminated_length": 609.47412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.21333333333333335, "grad_norm": 0.01800602674484253, "learning_rate": 0.0, "loss": -0.0076, "num_tokens": 45708532.0, "reward": 1.2270984649658203, "reward_std": 0.25381046533584595, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6583936810493469, "rewards/format_reward_step": 0.953125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.030479375715949573, "train_runtime": 15490.145, "train_samples_per_second": 3.305, "train_steps_per_second": 0.013 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 45708532, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }