{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.05, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.16193221509456635, "learning_rate": 2.5000000000000004e-07, "loss": 0.0332, "num_tokens": 264685.0, "reward": 0.03929531201720238, "reward_std": 0.08434611558914185, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step_strict": 0.0390625, "step": 1 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006253078579902649, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step_strict": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.05, "aux_brier/loss": -2.8176203592522597e-08, "aux_brier/mean_group_std": 0.029847013798721494, "aux_brier/mean_r": 0.41571989539143306, "aux_brier/n_active_tok": 22.285714285714285, "aux_brier/n_groups": 4.785714285714286, "aux_brier/n_step_records": 5.571428571428571, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.7692307692307692, "calib/avg_num_step_conf": 0.3046875, "calib/ece": 0.644705882352941, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.7647058823529411, "calib/gap": 0.13076923076923086, "calib/mean_conf": 0.88, "calib/mu_c": 0.98, "calib/mu_w": 0.8492307692307691, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.644705882352941, "calib/std_conf": 0.19991174523362287, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3060.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 614.921875, "completions/mean_terminated_length": 675.622314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.004395780153572559, "learning_rate": 7.5e-07, "loss": 0.0312, "num_tokens": 796143.0, "reward": 0.04669833928346634, "reward_std": 0.10021394491195679, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.022730860859155655, "rewards/format_reward_step_strict": 0.05078125, "step": 3 }, { "aux_brier/lambda": 0.049999999999999996, "aux_brier/loss": 3.7930230269787235e-09, "aux_brier/mean_group_std": 0.030711086994661366, "aux_brier/mean_r": 0.4590041716009507, "aux_brier/n_active_tok": 22.545454545454547, "aux_brier/n_groups": 4.818181818181818, "aux_brier/n_step_records": 5.636363636363637, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.6, "calib/avg_num_step_conf": 0.25390625, "calib/ece": 0.6273272727272727, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.5454545454545454, "calib/gap": 0.25493999999999994, "calib/mean_conf": 0.7182363636363637, "calib/mu_c": 0.95, "calib/mu_w": 0.69506, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6273272727272727, "calib/std_conf": 0.3309647255928898, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 669.00390625, "completions/mean_terminated_length": 735.0429077148438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.0034748422913253307, "learning_rate": 1.0000000000000002e-06, "loss": 0.0095, "num_tokens": 1073576.0, "reward": 0.01978631690144539, "reward_std": 0.04911034554243088, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.008832764811813831, "rewards/format_reward_step_strict": 0.02734375, "step": 4 }, { "aux_brier/lambda": 0.049999999999999996, "aux_brier/loss": -7.947286206899663e-09, "aux_brier/mean_group_std": 0.007505333331238634, "aux_brier/mean_r": 0.3610654883250641, "aux_brier/n_active_tok": 26.0, "aux_brier/n_groups": 6.1, "aux_brier/n_step_records": 6.5, "calib/answer_extract_rate": 0.06640625, "calib/avg_num_step_conf": 0.296875, "calib/ece": 0.965, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.965, "calib/mu_c": NaN, "calib/mu_w": 0.965, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.965, "calib/std_conf": 0.013228756555322964, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 641.55859375, "completions/mean_terminated_length": 717.2008666992188, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.005333333333333333, "grad_norm": 0.0031038725282996893, "learning_rate": 1.25e-06, "loss": 0.0112, "num_tokens": 1344503.0, "reward": 0.014093163423240185, "reward_std": 0.03986148536205292, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0016851562540978193, "rewards/format_reward_step_strict": 0.01953125, "step": 5 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": -4.303009086303224e-09, "aux_brier/mean_group_std": 0.017132194048956134, "aux_brier/mean_r": 0.5234074353874353, "aux_brier/n_active_tok": 22.11764705882353, "aux_brier/n_groups": 5.176470588235294, "aux_brier/n_step_records": 5.529411764705882, "calib/answer_extract_rate": 0.0703125, "calib/avg_num_step_conf": 0.375, "calib/ece": 0.8025, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.625, "calib/mean_conf": 0.8025, "calib/mu_c": NaN, "calib/mu_w": 0.8025, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.8025, "calib/std_conf": 0.2742147880767921, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3051.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 554.87890625, "completions/mean_terminated_length": 609.65234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.0043120114132761955, "learning_rate": 1.5e-06, "loss": 0.0394, "num_tokens": 1592504.0, "reward": 0.024134960025548935, "reward_std": 0.05962400883436203, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.010602343827486038, "rewards/format_reward_step_strict": 0.04296875, "step": 6 }, { "aux_brier/lambda": 0.049999999999999996, "aux_brier/loss": -7.968535374981332e-08, "aux_brier/mean_group_std": 0.01891592284558502, "aux_brier/mean_r": 0.6214788348051656, "aux_brier/n_active_tok": 28.363636363636363, "aux_brier/n_groups": 6.0, "aux_brier/n_step_records": 7.090909090909091, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.25, "calib/avg_num_step_conf": 0.31640625, "calib/ece": 0.6187777777777778, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.4444444444444444, "calib/gap": -0.12342857142857144, "calib/mean_conf": 0.841, "calib/mu_c": 0.745, "calib/mu_w": 0.8684285714285714, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.6187777777777778, "calib/std_conf": 0.13927271408602937, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2951.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 671.2109375, "completions/mean_terminated_length": 760.3097534179688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.13974224030971527, "learning_rate": 1.75e-06, "loss": 0.009, "num_tokens": 1871758.0, "reward": 0.02218896523118019, "reward_std": 0.0627598688006401, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.010630855336785316, "rewards/format_reward_step_strict": 0.0234375, "step": 7 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": -1.0630785437134213e-08, "aux_brier/mean_group_std": 0.06213361844459951, "aux_brier/mean_r": 0.3225575777875755, "aux_brier/n_active_tok": 23.466666666666665, "aux_brier/n_groups": 5.066666666666666, "aux_brier/n_step_records": 5.866666666666666, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.3125, "calib/avg_num_step_conf": 0.34765625, "calib/ece": 0.5566666666666668, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.7222222222222222, "calib/gap": 0.03750000000000009, "calib/mean_conf": 0.89, "calib/mu_c": 0.915, "calib/mu_w": 0.8775, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.5566666666666668, "calib/std_conf": 0.20264912204760888, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3052.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 625.6796875, "completions/mean_terminated_length": 696.40869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.008533333333333334, "grad_norm": 0.002846751594915986, "learning_rate": 2.0000000000000003e-06, "loss": 0.0284, "num_tokens": 2138444.0, "reward": 0.055156148970127106, "reward_std": 0.08708842843770981, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.025312108919024467, "rewards/format_reward_step_strict": 0.05078125, "step": 8 }, { "aux_brier/lambda": 0.05, "aux_brier/loss": 4.903836682755371e-08, "aux_brier/mean_group_std": 0.06103745202557255, "aux_brier/mean_r": 0.5146568030651996, "aux_brier/n_active_tok": 20.857142857142858, "aux_brier/n_groups": 4.071428571428571, "aux_brier/n_step_records": 5.214285714285714, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.35833333333333334, "calib/avg_num_step_conf": 0.296875, "calib/ece": 0.6105941176470588, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.6470588235294118, "calib/gap": -0.022841666666666538, "calib/mean_conf": 0.7941235294117647, "calib/mu_c": 0.778, "calib/mu_w": 0.8008416666666666, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.5553, "calib/std_conf": 0.3163816524936332, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 716.05859375, "completions/mean_terminated_length": 773.464111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0096, "grad_norm": 0.2166428118944168, "learning_rate": 2.25e-06, "loss": 0.0302, "num_tokens": 2429291.0, "reward": 0.04808994382619858, "reward_std": 0.10048744082450867, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.02048475854098797, "rewards/format_reward_step_strict": 0.046875, "step": 9 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": -6.612390301974358e-09, "aux_brier/mean_group_std": 0.022029799530182782, "aux_brier/mean_r": 0.48289992708497376, "aux_brier/n_active_tok": 21.0, "aux_brier/n_groups": 4.5625, "aux_brier/n_step_records": 5.25, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.6388888888888888, "calib/avg_num_step_conf": 0.33984375, "calib/ece": 0.672, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.10166666666666657, "calib/mean_conf": 0.8720000000000001, "calib/mu_c": 0.9533333333333333, "calib/mu_w": 0.8516666666666667, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.672, "calib/std_conf": 0.174783675820522, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3015.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 644.64453125, "completions/mean_terminated_length": 687.620849609375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.010666666666666666, "grad_norm": 0.05089627206325531, "learning_rate": 2.5e-06, "loss": 0.0552, "num_tokens": 2701120.0, "reward": 0.04478124901652336, "reward_std": 0.08551986515522003, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.022874999791383743, "rewards/format_reward_step_strict": 0.0546875, "step": 10 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": 9.959429873617655e-08, "aux_brier/mean_group_std": 0.06935623024823882, "aux_brier/mean_r": 0.3516953115711509, "aux_brier/n_active_tok": 28.705882352941178, "aux_brier/n_groups": 5.588235294117647, "aux_brier/n_step_records": 7.176470588235294, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.7, "calib/avg_num_step_conf": 0.51953125, "calib/ece": 0.6410526315789475, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.7368421052631579, "calib/gap": 0.07571428571428584, "calib/mean_conf": 0.9042105263157897, "calib/mu_c": 0.96, "calib/mu_w": 0.8842857142857141, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.10546875, "calib/pce": 0.6410526315789475, "calib/std_conf": 0.1669325672074201, "calib/step_conf_rate": 0.10546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 710.73828125, "completions/mean_terminated_length": 742.64892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.12358586490154266, "learning_rate": 2.7500000000000004e-06, "loss": 0.0153, "num_tokens": 2987549.0, "reward": 0.053199902176856995, "reward_std": 0.08878539502620697, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.025299610570073128, "rewards/format_reward_step_strict": 0.0546875, "step": 11 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": 8.21229944788134e-08, "aux_brier/mean_group_std": 0.0582216399601433, "aux_brier/mean_r": 0.5254478349033717, "aux_brier/n_active_tok": 25.904761904761905, "aux_brier/n_groups": 4.9523809523809526, "aux_brier/n_step_records": 6.476190476190476, "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.611111111111111, "calib/avg_num_step_conf": 0.625, "calib/ece": 0.5332142857142858, "calib/final_conf_rate": 0.109375, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.7857142857142857, "calib/gap": 0.042999999999999816, "calib/mean_conf": 0.8903571428571428, "calib/mu_c": 0.9179999999999999, "calib/mu_w": 0.8750000000000001, "calib/nonempty_final_conf_rate": 0.109375, "calib/nonempty_reasoning_rate": 0.1875, "calib/nonempty_step_conf_rate": 0.12890625, "calib/pce": 0.5332142857142858, "calib/std_conf": 0.1983590205168602, "calib/step_conf_rate": 0.12890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 621.9765625, "completions/mean_terminated_length": 674.6864624023438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.12016642838716507, "learning_rate": 3e-06, "loss": 0.0694, "num_tokens": 3250951.0, "reward": 0.0971623957157135, "reward_std": 0.17875269055366516, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.04489960893988609, "rewards/format_reward_step_strict": 0.09375, "step": 12 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": -1.3304560525284615e-07, "aux_brier/mean_group_std": 0.05968226273717765, "aux_brier/mean_r": 0.38506495852629286, "aux_brier/n_active_tok": 27.04, "aux_brier/n_groups": 4.76, "aux_brier/n_step_records": 6.76, "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.6538461538461539, "calib/avg_num_step_conf": 0.671875, "calib/ece": 0.7648387096774194, "calib/final_conf_rate": 0.12109375, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.8387096774193549, "calib/gap": 0.059461538461538455, "calib/mean_conf": 0.9261290322580644, "calib/mu_c": 0.976, "calib/mu_w": 0.9165384615384615, "calib/nonempty_final_conf_rate": 0.12109375, "calib/nonempty_reasoning_rate": 0.19140625, "calib/nonempty_step_conf_rate": 0.1640625, "calib/pce": 0.7648387096774194, "calib/std_conf": 0.1230703292078214, "calib/step_conf_rate": 0.1640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 671.5390625, "completions/mean_terminated_length": 713.3361206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.058262571692466736, "learning_rate": 3.2500000000000002e-06, "loss": 0.073, "num_tokens": 3527457.0, "reward": 0.07933026552200317, "reward_std": 0.1612529754638672, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.028258593752980232, "rewards/format_reward_step_strict": 0.09765625, "step": 13 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 6.968066774248882e-08, "aux_brier/mean_group_std": 0.0971092681259337, "aux_brier/mean_r": 0.4211621161943812, "aux_brier/n_active_tok": 31.84, "aux_brier/n_groups": 5.44, "aux_brier/n_step_records": 7.96, "calib/answer_extract_rate": 0.2109375, "calib/auroc": 0.563063063063063, "calib/avg_num_step_conf": 0.80078125, "calib/ece": 0.7269565217391302, "calib/final_conf_rate": 0.1796875, "calib/format_rate": 0.15234375, "calib/frac_conf_gt_0.9": 0.7391304347826086, "calib/gap": 0.010570570570570759, "calib/mean_conf": 0.9226086956521737, "calib/mu_c": 0.9311111111111112, "calib/mu_w": 0.9205405405405405, "calib/nonempty_final_conf_rate": 0.1796875, "calib/nonempty_reasoning_rate": 0.23828125, "calib/nonempty_step_conf_rate": 0.1875, "calib/pce": 0.7269565217391302, "calib/std_conf": 0.09987894374014226, "calib/step_conf_rate": 0.1875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3046.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 616.3046875, "completions/mean_terminated_length": 683.0043334960938, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.014933333333333333, "grad_norm": 0.12956944108009338, "learning_rate": 3.5e-06, "loss": 0.0887, "num_tokens": 3790631.0, "reward": 0.12147372961044312, "reward_std": 0.20730483531951904, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.04839491844177246, "rewards/format_reward_step_strict": 0.1484375, "step": 14 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 3.00324828937339e-08, "aux_brier/mean_group_std": 0.06625814184144391, "aux_brier/mean_r": 0.4505305222592945, "aux_brier/n_active_tok": 42.0, "aux_brier/n_groups": 6.269230769230769, "aux_brier/n_step_records": 10.5, "calib/answer_extract_rate": 0.21875, "calib/auroc": 0.42647058823529405, "calib/avg_num_step_conf": 1.08203125, "calib/ece": 0.7490476190476191, "calib/final_conf_rate": 0.1640625, "calib/format_rate": 0.12109375, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": -0.04117647058823537, "calib/mean_conf": 0.9133333333333333, "calib/mu_c": 0.8799999999999999, "calib/mu_w": 0.9211764705882353, "calib/nonempty_final_conf_rate": 0.1640625, "calib/nonempty_reasoning_rate": 0.2734375, "calib/nonempty_step_conf_rate": 0.2109375, "calib/pce": 0.735952380952381, "calib/std_conf": 0.13688136446027974, "calib/step_conf_rate": 0.2109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2831.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 545.08984375, "completions/mean_terminated_length": 606.7086791992188, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.016, "grad_norm": 0.1261015087366104, "learning_rate": 3.7500000000000005e-06, "loss": 0.0349, "num_tokens": 4038054.0, "reward": 0.11347460746765137, "reward_std": 0.22185897827148438, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.039835937321186066, "rewards/format_reward_step_strict": 0.12109375, "step": 15 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.0546628842286839e-08, "aux_brier/mean_group_std": 0.15131436740359547, "aux_brier/mean_r": 0.541980853761356, "aux_brier/n_active_tok": 47.58620689655172, "aux_brier/n_groups": 6.758620689655173, "aux_brier/n_step_records": 11.89655172413793, "calib/answer_extract_rate": 0.2578125, "calib/auroc": 0.4716599190283401, "calib/avg_num_step_conf": 1.3984375, "calib/ece": 0.6116379310344826, "calib/final_conf_rate": 0.2265625, "calib/format_rate": 0.1875, "calib/frac_conf_gt_0.9": 0.8448275862068966, "calib/gap": 0.025310391363022933, "calib/mean_conf": 0.9150862068965516, "calib/mu_c": 0.9321052631578945, "calib/mu_w": 0.9067948717948716, "calib/nonempty_final_conf_rate": 0.2265625, "calib/nonempty_reasoning_rate": 0.34375, "calib/nonempty_step_conf_rate": 0.28125, "calib/pce": 0.5995689655172411, "calib/std_conf": 0.16442294961918918, "calib/step_conf_rate": 0.28125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2951.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 597.69921875, "completions/mean_terminated_length": 665.2651977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.2648871839046478, "learning_rate": 4.000000000000001e-06, "loss": 0.0331, "num_tokens": 4299913.0, "reward": 0.1786659061908722, "reward_std": 0.3179003596305847, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.074038565158844, "rewards/format_reward_step_strict": 0.171875, "step": 16 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.06125258872936e-08, "aux_brier/mean_group_std": 0.123763447321381, "aux_brier/mean_r": 0.5180065285588968, "aux_brier/n_active_tok": 56.375, "aux_brier/n_groups": 7.71875, "aux_brier/n_step_records": 14.09375, "calib/answer_extract_rate": 0.375, "calib/auroc": 0.41557734204793023, "calib/avg_num_step_conf": 1.8203125, "calib/ece": 0.696056338028169, "calib/final_conf_rate": 0.27734375, "calib/format_rate": 0.2265625, "calib/frac_conf_gt_0.9": 0.6901408450704225, "calib/gap": -0.08327886710239651, "calib/mean_conf": 0.8709859154929577, "calib/mu_c": 0.8076470588235294, "calib/mu_w": 0.8909259259259259, "calib/nonempty_final_conf_rate": 0.27734375, "calib/nonempty_reasoning_rate": 0.4375, "calib/nonempty_step_conf_rate": 0.328125, "calib/pce": 0.6638028169014084, "calib/std_conf": 0.2130737816375937, "calib/step_conf_rate": 0.328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 535.3828125, "completions/mean_terminated_length": 557.1463012695312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.15465809404850006, "learning_rate": 4.25e-06, "loss": 0.083, "num_tokens": 4540499.0, "reward": 0.19698895514011383, "reward_std": 0.3120965361595154, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.0770183578133583, "rewards/format_reward_step_strict": 0.22265625, "step": 17 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.4943371377583044e-07, "aux_brier/mean_group_std": 0.1019210505216714, "aux_brier/mean_r": 0.5275322096514212, "aux_brier/n_active_tok": 42.064516129032256, "aux_brier/n_groups": 6.419354838709677, "aux_brier/n_step_records": 10.516129032258064, "calib/answer_extract_rate": 0.2734375, "calib/auroc": 0.7057823129251701, "calib/avg_num_step_conf": 1.2890625, "calib/ece": 0.6642622950819672, "calib/final_conf_rate": 0.23828125, "calib/format_rate": 0.17578125, "calib/frac_conf_gt_0.9": 0.5901639344262295, "calib/gap": 0.10355442176870766, "calib/mean_conf": 0.860983606557377, "calib/mu_c": 0.9441666666666668, "calib/mu_w": 0.8406122448979592, "calib/nonempty_final_conf_rate": 0.23828125, "calib/nonempty_reasoning_rate": 0.34375, "calib/nonempty_step_conf_rate": 0.27734375, "calib/pce": 0.6642622950819672, "calib/std_conf": 0.21424442084311687, "calib/step_conf_rate": 0.27734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 618.37890625, "completions/mean_terminated_length": 651.4608764648438, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0192, "grad_norm": 0.2550928592681885, "learning_rate": 4.5e-06, "loss": 0.1194, "num_tokens": 4809524.0, "reward": 0.1505630910396576, "reward_std": 0.2417924702167511, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.07100234925746918, "rewards/format_reward_step_strict": 0.171875, "step": 18 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.1574589175999961e-08, "aux_brier/mean_group_std": 0.15655222438005406, "aux_brier/mean_r": 0.5297212505808176, "aux_brier/n_active_tok": 86.75, "aux_brier/n_groups": 7.53125, "aux_brier/n_step_records": 21.6875, "calib/answer_extract_rate": 0.578125, "calib/auroc": 0.4634055265123226, "calib/avg_num_step_conf": 2.79296875, "calib/ece": 0.6772984496124032, "calib/final_conf_rate": 0.50390625, "calib/format_rate": 0.42578125, "calib/frac_conf_gt_0.9": 0.6511627906976745, "calib/gap": -0.00889880507841656, "calib/mean_conf": 0.8351821705426357, "calib/mu_c": 0.8280769230769229, "calib/mu_w": 0.8369757281553395, "calib/nonempty_final_conf_rate": 0.50390625, "calib/nonempty_reasoning_rate": 0.64453125, "calib/nonempty_step_conf_rate": 0.53125, "calib/pce": 0.6554651162790698, "calib/std_conf": 0.2665359101275789, "calib/step_conf_rate": 0.53125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 462.88671875, "completions/mean_terminated_length": 483.66937255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.020266666666666665, "grad_norm": 0.02498788572847843, "learning_rate": 4.75e-06, "loss": 0.1313, "num_tokens": 5032783.0, "reward": 0.34538060426712036, "reward_std": 0.4372578263282776, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.14714746177196503, "rewards/format_reward_step_strict": 0.40625, "step": 19 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.524534731789373e-08, "aux_brier/mean_group_std": 0.18416291747432723, "aux_brier/mean_r": 0.4724621196341125, "aux_brier/n_active_tok": 98.25, "aux_brier/n_groups": 7.5, "aux_brier/n_step_records": 24.5625, "calib/answer_extract_rate": 0.68359375, "calib/auroc": 0.5297591477535896, "calib/avg_num_step_conf": 3.16796875, "calib/ece": 0.7053416149068322, "calib/final_conf_rate": 0.62890625, "calib/format_rate": 0.546875, "calib/frac_conf_gt_0.9": 0.782608695652174, "calib/gap": 0.0243816581750812, "calib/mean_conf": 0.9078260869565217, "calib/mu_c": 0.9270588235294118, "calib/mu_w": 0.9026771653543306, "calib/nonempty_final_conf_rate": 0.62890625, "calib/nonempty_reasoning_rate": 0.79296875, "calib/nonempty_step_conf_rate": 0.69140625, "calib/pce": 0.7009937888198756, "calib/std_conf": 0.15433164117749484, "calib/step_conf_rate": 0.69140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 368.48046875, "completions/mean_terminated_length": 377.3240051269531, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.021333333333333333, "grad_norm": 0.12939509749412537, "learning_rate": 5e-06, "loss": 0.0943, "num_tokens": 5231986.0, "reward": 0.4509944021701813, "reward_std": 0.4627910852432251, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.18679022789001465, "rewards/format_reward_step_strict": 0.52734375, "step": 20 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.6855419965897696e-09, "aux_brier/mean_group_std": 0.2078869918818616, "aux_brier/mean_r": 0.4831021750609932, "aux_brier/n_active_tok": 121.0, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 30.25, "calib/answer_extract_rate": 0.79296875, "calib/auroc": 0.5836913049918531, "calib/avg_num_step_conf": 3.85546875, "calib/ece": 0.6835785000000001, "calib/final_conf_rate": 0.78125, "calib/format_rate": 0.70703125, "calib/frac_conf_gt_0.9": 0.72, "calib/gap": 0.04802620352540332, "calib/mean_conf": 0.8985785000000002, "calib/mu_c": 0.9362790697674418, "calib/mu_w": 0.8882528662420385, "calib/nonempty_final_conf_rate": 0.78125, "calib/nonempty_reasoning_rate": 0.8671875, "calib/nonempty_step_conf_rate": 0.80078125, "calib/pce": 0.6835785000000001, "calib/std_conf": 0.15646988460323602, "calib/step_conf_rate": 0.80078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 335.38671875, "completions/mean_terminated_length": 339.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0224, "grad_norm": 0.044533684849739075, "learning_rate": 4.9722222222222224e-06, "loss": 0.0741, "num_tokens": 5420805.0, "reward": 0.5556639432907104, "reward_std": 0.49617576599121094, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.2460932433605194, "rewards/format_reward_step_strict": 0.65234375, "step": 21 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.322772361233575e-09, "aux_brier/mean_group_std": 0.2046128928256343, "aux_brier/mean_r": 0.5185869317094062, "aux_brier/n_active_tok": 132.625, "aux_brier/n_groups": 9.46875, "aux_brier/n_step_records": 33.15625, "calib/answer_extract_rate": 0.84765625, "calib/auroc": 0.5711273006134969, "calib/avg_num_step_conf": 4.18359375, "calib/ece": 0.6619905213270142, "calib/final_conf_rate": 0.82421875, "calib/format_rate": 0.7421875, "calib/frac_conf_gt_0.9": 0.7251184834123223, "calib/gap": 0.040318251533742244, "calib/mean_conf": 0.889478672985782, "calib/mu_c": 0.9206249999999999, "calib/mu_w": 0.8803067484662577, "calib/nonempty_final_conf_rate": 0.82421875, "calib/nonempty_reasoning_rate": 0.921875, "calib/nonempty_step_conf_rate": 0.84765625, "calib/pce": 0.6619905213270142, "calib/std_conf": 0.1804344763509982, "calib/step_conf_rate": 0.84765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2683.0, "completions/max_terminated_length": 2683.0, "completions/mean_length": 329.69140625, "completions/mean_terminated_length": 330.9843444824219, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.023466666666666667, "grad_norm": 0.07365760207176208, "learning_rate": 4.944444444444445e-06, "loss": 0.1207, "num_tokens": 5607022.0, "reward": 0.6302892565727234, "reward_std": 0.49790266156196594, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.26334452629089355, "rewards/format_reward_step_strict": 0.73828125, "step": 22 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.4648843964237823e-08, "aux_brier/mean_group_std": 0.21768981241071053, "aux_brier/mean_r": 0.5264782417382557, "aux_brier/n_active_tok": 125.875, "aux_brier/n_groups": 7.625, "aux_brier/n_step_records": 31.46875, "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.4203296703296703, "calib/avg_num_step_conf": 3.984375, "calib/ece": 0.6808136363636363, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.80078125, "calib/frac_conf_gt_0.9": 0.6590909090909091, "calib/gap": -0.05642261904761903, "calib/mean_conf": 0.8780863636363636, "calib/mu_c": 0.8350000000000001, "calib/mu_w": 0.8914226190476191, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.90625, "calib/pce": 0.6612681818181817, "calib/std_conf": 0.18692190103563802, "calib/step_conf_rate": 0.90625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2147.0, "completions/max_terminated_length": 2147.0, "completions/mean_length": 318.7109375, "completions/mean_terminated_length": 321.220458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.024533333333333334, "grad_norm": 0.10605355352163315, "learning_rate": 4.9166666666666665e-06, "loss": 0.1017, "num_tokens": 5792548.0, "reward": 0.6638668775558472, "reward_std": 0.45015454292297363, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.28046756982803345, "rewards/format_reward_step_strict": 0.7734375, "step": 23 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.2244822426605886e-09, "aux_brier/mean_group_std": 0.20294305429502568, "aux_brier/mean_r": 0.5130447519385217, "aux_brier/n_active_tok": 148.75, "aux_brier/n_groups": 9.0625, "aux_brier/n_step_records": 37.1875, "calib/answer_extract_rate": 0.875, "calib/auroc": 0.4318121693121693, "calib/avg_num_step_conf": 4.75390625, "calib/ece": 0.7022972972972974, "calib/final_conf_rate": 0.8671875, "calib/format_rate": 0.796875, "calib/frac_conf_gt_0.9": 0.7072072072072072, "calib/gap": -0.010209523809523668, "calib/mean_conf": 0.8868018018018017, "calib/mu_c": 0.8785238095238096, "calib/mu_w": 0.8887333333333333, "calib/nonempty_final_conf_rate": 0.8671875, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.6999549549549551, "calib/std_conf": 0.17966138419183594, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2745.0, "completions/max_terminated_length": 2745.0, "completions/mean_length": 325.76171875, "completions/mean_terminated_length": 328.3267822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.0256, "grad_norm": 0.21002741158008575, "learning_rate": 4.888888888888889e-06, "loss": 0.1087, "num_tokens": 5980455.0, "reward": 0.610745370388031, "reward_std": 0.43826824426651, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.263293981552124, "rewards/format_reward_step_strict": 0.75390625, "step": 24 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.988896718846389e-08, "aux_brier/mean_group_std": 0.20363710790214565, "aux_brier/mean_r": 0.49830946569913254, "aux_brier/n_active_tok": 150.875, "aux_brier/n_groups": 8.125, "aux_brier/n_step_records": 37.71875, "calib/answer_extract_rate": 0.91796875, "calib/auroc": 0.6020542400323822, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.6479831223628691, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.6582278481012658, "calib/gap": 0.05367668488160293, "calib/mean_conf": 0.875831223628692, "calib/mu_c": 0.9172777777777777, "calib/mu_w": 0.8636010928961748, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.6479831223628691, "calib/std_conf": 0.1821509001281357, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1848.0, "completions/max_terminated_length": 1848.0, "completions/mean_length": 280.63671875, "completions/mean_terminated_length": 280.63671875, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.02666666666666667, "grad_norm": 0.008940259926021099, "learning_rate": 4.861111111111111e-06, "loss": 0.0274, "num_tokens": 6155522.0, "reward": 0.7232168316841125, "reward_std": 0.4639264941215515, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.33817988634109497, "rewards/format_reward_step_strict": 0.85546875, "step": 25 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.733723841358731e-09, "aux_brier/mean_group_std": 0.1880441359486682, "aux_brier/mean_r": 0.5253828549221222, "aux_brier/n_active_tok": 154.125, "aux_brier/n_groups": 8.25, "aux_brier/n_step_records": 38.53125, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4772074659009333, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.7233775933609959, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.7012448132780082, "calib/gap": 0.010950227327111817, "calib/mean_conf": 0.8897676348547717, "calib/mu_c": 0.8988095238095238, "calib/mu_w": 0.887859296482412, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.719435684647303, "calib/std_conf": 0.17055198386843828, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 283.8046875, "completions/mean_terminated_length": 283.8046875, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.027733333333333332, "grad_norm": 0.11429033428430557, "learning_rate": 4.833333333333333e-06, "loss": 0.0064, "num_tokens": 6333416.0, "reward": 0.68150794506073, "reward_std": 0.4080682396888733, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.2963443994522095, "rewards/format_reward_step_strict": 0.88671875, "step": 26 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.146805901630632e-08, "aux_brier/mean_group_std": 0.20170100575564176, "aux_brier/mean_r": 0.5008420967947467, "aux_brier/n_active_tok": 158.0, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 39.5, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4906553398058252, "calib/avg_num_step_conf": 4.96484375, "calib/ece": 0.7226029810298104, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.6829268292682927, "calib/gap": 0.0549862459546927, "calib/mean_conf": 0.8852046070460704, "calib/mu_c": 0.93125, "calib/mu_w": 0.8762637540453073, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.7226029810298104, "calib/std_conf": 0.17142669879331107, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2698.0, "completions/max_terminated_length": 2698.0, "completions/mean_length": 271.6875, "completions/mean_terminated_length": 271.6875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.0288, "grad_norm": 0.2054324895143509, "learning_rate": 4.805555555555556e-06, "loss": 0.0588, "num_tokens": 6508184.0, "reward": 0.690642237663269, "reward_std": 0.35970115661621094, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.30944421887397766, "rewards/format_reward_step_strict": 0.890625, "step": 27 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.251436984547556e-09, "aux_brier/mean_group_std": 0.21422411978879408, "aux_brier/mean_r": 0.5064897092037213, "aux_brier/n_active_tok": 154.25, "aux_brier/n_groups": 9.34375, "aux_brier/n_step_records": 38.5625, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5271692745376956, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.6659421487603305, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.7066115702479339, "calib/gap": 0.01013504030346124, "calib/mean_conf": 0.8903223140495867, "calib/mu_c": 0.8980701754385964, "calib/mu_w": 0.8879351351351351, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.6603636363636363, "calib/std_conf": 0.16952703847708325, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1584.0, "completions/max_terminated_length": 1584.0, "completions/mean_length": 283.50390625, "completions/mean_terminated_length": 283.50390625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.029866666666666666, "grad_norm": 0.1324453055858612, "learning_rate": 4.777777777777778e-06, "loss": 0.0586, "num_tokens": 6687705.0, "reward": 0.7451667785644531, "reward_std": 0.43373051285743713, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.3400421738624573, "rewards/format_reward_step_strict": 0.875, "step": 28 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.774421712341656e-08, "aux_brier/mean_group_std": 0.19610744163840346, "aux_brier/mean_r": 0.5103295116870583, "aux_brier/n_active_tok": 169.875, "aux_brier/n_groups": 8.90625, "aux_brier/n_step_records": 42.46875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5384146341463415, "calib/avg_num_step_conf": 5.484375, "calib/ece": 0.7245673469387756, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.6816326530612244, "calib/gap": 0.053980487804878186, "calib/mean_conf": 0.8878326530612244, "calib/mu_c": 0.933, "calib/mu_w": 0.8790195121951219, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.7245673469387756, "calib/std_conf": 0.16122579095315817, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1917.0, "completions/max_terminated_length": 1917.0, "completions/mean_length": 303.91015625, "completions/mean_terminated_length": 306.30316162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.030933333333333334, "grad_norm": 0.34857821464538574, "learning_rate": 4.75e-06, "loss": -0.0121, "num_tokens": 6872634.0, "reward": 0.680101752281189, "reward_std": 0.38362351059913635, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.2985318899154663, "rewards/format_reward_step_strict": 0.890625, "step": 29 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.349526157657822e-08, "aux_brier/mean_group_std": 0.18645527569371767, "aux_brier/mean_r": 0.5258975753340903, "aux_brier/n_active_tok": 172.125, "aux_brier/n_groups": 10.78125, "aux_brier/n_step_records": 43.03125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5746729461015174, "calib/avg_num_step_conf": 5.41796875, "calib/ece": 0.6881557377049181, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.6475409836065574, "calib/gap": 0.02557299843014127, "calib/mean_conf": 0.882827868852459, "calib/mu_c": 0.903265306122449, "calib/mu_w": 0.8776923076923078, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.6850819672131149, "calib/std_conf": 0.16179258086614187, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2010.0, "completions/max_terminated_length": 2010.0, "completions/mean_length": 304.546875, "completions/mean_terminated_length": 304.546875, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.032, "grad_norm": 0.10014064610004425, "learning_rate": 4.722222222222222e-06, "loss": 0.0001, "num_tokens": 7057582.0, "reward": 0.7322098612785339, "reward_std": 0.4103948175907135, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.33508944511413574, "rewards/format_reward_step_strict": 0.90625, "step": 30 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.70296445326657e-09, "aux_brier/mean_group_std": 0.2190587943030789, "aux_brier/mean_r": 0.5495102326387148, "aux_brier/n_active_tok": 175.125, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 43.78125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.524795918367347, "calib/avg_num_step_conf": 5.5, "calib/ece": 0.6690361445783132, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.5823293172690763, "calib/gap": 0.03365714285714294, "calib/mean_conf": 0.8658232931726908, "calib/mu_c": 0.8928571428571429, "calib/mu_w": 0.8592, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6690361445783132, "calib/std_conf": 0.17100525162676766, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 294.66796875, "completions/mean_terminated_length": 294.66796875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.03306666666666667, "grad_norm": 0.3468322455883026, "learning_rate": 4.694444444444445e-06, "loss": 0.0431, "num_tokens": 7238929.0, "reward": 0.7521989941596985, "reward_std": 0.38824474811553955, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.3603585958480835, "rewards/format_reward_step_strict": 0.94140625, "step": 31 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.008295861363976e-10, "aux_brier/mean_group_std": 0.20375160642834253, "aux_brier/mean_r": 0.6171608731266107, "aux_brier/n_active_tok": 158.5, "aux_brier/n_groups": 7.75, "aux_brier/n_step_records": 39.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5288600288600288, "calib/avg_num_step_conf": 4.9765625, "calib/ece": 0.6176574803149606, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5078740157480315, "calib/gap": 0.053775252525252504, "calib/mean_conf": 0.8318307086614174, "calib/mu_c": 0.87375, "calib/mu_w": 0.8199747474747475, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.614507874015748, "calib/std_conf": 0.2009100723388808, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 803.0, "completions/max_terminated_length": 803.0, "completions/mean_length": 263.7734375, "completions/mean_terminated_length": 264.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.034133333333333335, "grad_norm": 0.08650796115398407, "learning_rate": 4.666666666666667e-06, "loss": 0.0278, "num_tokens": 7413159.0, "reward": 0.8118621706962585, "reward_std": 0.3442915380001068, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.42713624238967896, "rewards/format_reward_step_strict": 0.97265625, "step": 32 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.882408420423204e-09, "aux_brier/mean_group_std": 0.20335982369599354, "aux_brier/mean_r": 0.5829862248054526, "aux_brier/n_active_tok": 172.375, "aux_brier/n_groups": 8.75, "aux_brier/n_step_records": 43.09375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5207657770017461, "calib/avg_num_step_conf": 5.41015625, "calib/ece": 0.7021686746987952, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.5461847389558233, "calib/gap": 6.734846595157773e-05, "calib/mean_conf": 0.8520481927710845, "calib/mu_c": 0.8521052631578946, "calib/mu_w": 0.852037914691943, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.7008032128514057, "calib/std_conf": 0.1779767529344418, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 916.0, "completions/max_terminated_length": 916.0, "completions/mean_length": 299.81640625, "completions/mean_terminated_length": 302.1771545410156, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.0352, "grad_norm": 0.06140900403261185, "learning_rate": 4.638888888888889e-06, "loss": 0.0056, "num_tokens": 7596784.0, "reward": 0.7030861377716064, "reward_std": 0.3202725648880005, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.3357820212841034, "rewards/format_reward_step_strict": 0.94140625, "step": 33 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.224410730946371e-09, "aux_brier/mean_group_std": 0.1973998358719577, "aux_brier/mean_r": 0.675027915675693, "aux_brier/n_active_tok": 173.5, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 43.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5004618226600985, "calib/avg_num_step_conf": 5.46484375, "calib/ece": 0.5999203187250997, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.42231075697211157, "calib/gap": 0.016860632183907986, "calib/mean_conf": 0.7911553784860557, "calib/mu_c": 0.8047916666666666, "calib/mu_w": 0.7879310344827586, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5999203187250997, "calib/std_conf": 0.2017015757457608, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 885.0, "completions/max_terminated_length": 885.0, "completions/mean_length": 274.69921875, "completions/mean_terminated_length": 275.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.03626666666666667, "grad_norm": 1.1419422626495361, "learning_rate": 4.611111111111112e-06, "loss": 0.0116, "num_tokens": 7772219.0, "reward": 0.78791344165802, "reward_std": 0.3862859606742859, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.44071638584136963, "rewards/format_reward_step_strict": 0.97265625, "step": 34 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.123872044183631e-08, "aux_brier/mean_group_std": 0.19222505546668744, "aux_brier/mean_r": 0.6863559674116386, "aux_brier/n_active_tok": 180.5, "aux_brier/n_groups": 9.625, "aux_brier/n_step_records": 45.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6022884283246979, "calib/avg_num_step_conf": 5.6640625, "calib/ece": 0.53300395256917, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3438735177865613, "calib/gap": 0.08325215889464588, "calib/mean_conf": 0.7701581027667985, "calib/mu_c": 0.8336666666666666, "calib/mu_w": 0.7504145077720207, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.53300395256917, "calib/std_conf": 0.22595947095308164, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 887.0, "completions/max_terminated_length": 887.0, "completions/mean_length": 310.3203125, "completions/mean_terminated_length": 311.5372619628906, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.037333333333333336, "grad_norm": 0.09675668179988861, "learning_rate": 4.583333333333333e-06, "loss": 0.0073, "num_tokens": 7960917.0, "reward": 0.8488951921463013, "reward_std": 0.3726450204849243, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.5049558877944946, "rewards/format_reward_step_strict": 0.9765625, "step": 35 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.84285989088562e-09, "aux_brier/mean_group_std": 0.1746511756104062, "aux_brier/mean_r": 0.7607258928054881, "aux_brier/n_active_tok": 173.0, "aux_brier/n_groups": 9.40625, "aux_brier/n_step_records": 43.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5042504665146175, "calib/avg_num_step_conf": 5.45703125, "calib/ece": 0.37484000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.212, "calib/gap": 0.0037231322136981326, "calib/mean_conf": 0.68972, "calib/mu_c": 0.6920879120879121, "calib/mu_w": 0.688364779874214, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35028000000000004, "calib/std_conf": 0.2423491728890363, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1628.0, "completions/max_terminated_length": 1628.0, "completions/mean_length": 304.76171875, "completions/mean_terminated_length": 305.9568786621094, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.0384, "grad_norm": 0.5897600054740906, "learning_rate": 4.555555555555556e-06, "loss": 0.0127, "num_tokens": 8141648.0, "reward": 0.9805706739425659, "reward_std": 0.4639264941215515, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.5863453149795532, "rewards/format_reward_step_strict": 0.95703125, "step": 36 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.0809664130760908e-08, "aux_brier/mean_group_std": 0.18644022102253568, "aux_brier/mean_r": 0.7817912325901277, "aux_brier/n_active_tok": 164.875, "aux_brier/n_groups": 8.8125, "aux_brier/n_step_records": 41.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49517835808158395, "calib/avg_num_step_conf": 5.24609375, "calib/ece": 0.40549800796812757, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.18725099601593626, "calib/gap": -0.00244410308926446, "calib/mean_conf": 0.641195219123506, "calib/mu_c": 0.6393548387096774, "calib/mu_w": 0.6417989417989418, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39984063745019927, "calib/std_conf": 0.25375931623243464, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2081.0, "completions/max_terminated_length": 2081.0, "completions/mean_length": 302.0078125, "completions/mean_terminated_length": 303.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.039466666666666664, "grad_norm": 0.08607197552919388, "learning_rate": 4.527777777777778e-06, "loss": 0.0528, "num_tokens": 8326058.0, "reward": 0.870861291885376, "reward_std": 0.3708493709564209, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.5771952867507935, "rewards/format_reward_step_strict": 0.96875, "step": 37 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.5499739727808937e-08, "aux_brier/mean_group_std": 0.15065610893712256, "aux_brier/mean_r": 0.84442506153201, "aux_brier/n_active_tok": 163.625, "aux_brier/n_groups": 8.625, "aux_brier/n_step_records": 40.90625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4324565155403478, "calib/avg_num_step_conf": 5.1328125, "calib/ece": 0.31868525896414346, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.10756972111553785, "calib/gap": -0.05270958083832333, "calib/mean_conf": 0.5775697211155378, "calib/mu_c": 0.5425, "calib/mu_w": 0.5952095808383233, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28079681274900403, "calib/std_conf": 0.2625375520844664, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 728.0, "completions/max_terminated_length": 728.0, "completions/mean_length": 277.3203125, "completions/mean_terminated_length": 278.4078674316406, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.04053333333333333, "grad_norm": 0.2101420909166336, "learning_rate": 4.5e-06, "loss": 0.0435, "num_tokens": 8503940.0, "reward": 0.970810055732727, "reward_std": 0.363656222820282, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6098023653030396, "rewards/format_reward_step_strict": 0.97265625, "step": 38 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.453383709694506e-08, "aux_brier/mean_group_std": 0.1489221191832934, "aux_brier/mean_r": 0.8456605949715051, "aux_brier/n_active_tok": 180.25, "aux_brier/n_groups": 10.25, "aux_brier/n_step_records": 45.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4156654166985536, "calib/avg_num_step_conf": 5.64453125, "calib/ece": 0.31293650793650796, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0992063492063492, "calib/gap": -0.0772311930818092, "calib/mean_conf": 0.518968253968254, "calib/mu_c": 0.46410958904109584, "calib/mu_w": 0.541340782122905, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27111111111111114, "calib/std_conf": 0.2711712671634785, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1931.0, "completions/max_terminated_length": 1931.0, "completions/mean_length": 318.3359375, "completions/mean_terminated_length": 318.3359375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.0416, "grad_norm": 0.030873866751790047, "learning_rate": 4.472222222222223e-06, "loss": 0.0565, "num_tokens": 8691522.0, "reward": 0.9219582080841064, "reward_std": 0.33394742012023926, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.6175203323364258, "rewards/format_reward_step_strict": 0.96484375, "step": 39 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.1069860050838543e-07, "aux_brier/mean_group_std": 0.09696652906739323, "aux_brier/mean_r": 0.8914522087968476, "aux_brier/n_active_tok": 184.75, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 46.1875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5167284604519774, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.28800796812749, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.06374501992031872, "calib/gap": 0.024824329096045095, "calib/mean_conf": 0.4860956175298805, "calib/mu_c": 0.5050847457627118, "calib/mu_w": 0.4802604166666667, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26952191235059764, "calib/std_conf": 0.25475379650435753, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2183.0, "completions/max_terminated_length": 2183.0, "completions/mean_length": 331.79296875, "completions/mean_terminated_length": 331.79296875, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.042666666666666665, "grad_norm": 0.009598830714821815, "learning_rate": 4.444444444444444e-06, "loss": -0.0005, "num_tokens": 8883221.0, "reward": 0.8843405246734619, "reward_std": 0.35157209634780884, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.6779870986938477, "rewards/format_reward_step_strict": 0.96875, "step": 40 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.0429956512347793e-07, "aux_brier/mean_group_std": 0.08730693682591503, "aux_brier/mean_r": 0.911575622717098, "aux_brier/n_active_tok": 180.375, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 45.09375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5180523773201119, "calib/avg_num_step_conf": 5.6640625, "calib/ece": 0.21460317460317457, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": 0.0029824561403508087, "calib/mean_conf": 0.40301587301587305, "calib/mu_c": 0.4046491228070175, "calib/mu_w": 0.40166666666666667, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0826190476190476, "calib/std_conf": 0.24391082758159677, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 806.0, "completions/max_terminated_length": 806.0, "completions/mean_length": 299.2421875, "completions/mean_terminated_length": 300.41571044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.04373333333333333, "grad_norm": 0.17322491109371185, "learning_rate": 4.416666666666667e-06, "loss": 0.0106, "num_tokens": 9067075.0, "reward": 1.1116578578948975, "reward_std": 0.40159034729003906, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6810062527656555, "rewards/format_reward_step_strict": 0.984375, "step": 41 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.4525956246180804e-07, "aux_brier/mean_group_std": 0.08007366569528057, "aux_brier/mean_r": 0.9243059512665607, "aux_brier/n_active_tok": 168.5, "aux_brier/n_groups": 8.90625, "aux_brier/n_step_records": 42.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4832434646099243, "calib/avg_num_step_conf": 5.296875, "calib/ece": 0.16892857142857146, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05555555555555555, "calib/gap": -0.018466316292403306, "calib/mean_conf": 0.3443253968253968, "calib/mu_c": 0.3325274725274725, "calib/mu_w": 0.3509937888198758, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07607142857142857, "calib/std_conf": 0.23050139721248117, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 686.0, "completions/max_terminated_length": 686.0, "completions/mean_length": 271.64453125, "completions/mean_terminated_length": 272.7098083496094, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.0448, "grad_norm": 0.08163885772228241, "learning_rate": 4.388888888888889e-06, "loss": 0.0416, "num_tokens": 9240984.0, "reward": 1.0217323303222656, "reward_std": 0.28946220874786377, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6963043212890625, "rewards/format_reward_step_strict": 0.9765625, "step": 42 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.231101651130391e-08, "aux_brier/mean_group_std": 0.0746223491502667, "aux_brier/mean_r": 0.9358663043217771, "aux_brier/n_active_tok": 182.625, "aux_brier/n_groups": 10.71875, "aux_brier/n_step_records": 45.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.480614690539969, "calib/avg_num_step_conf": 5.7109375, "calib/ece": 0.19223529411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.002710418722684349, "calib/mean_conf": 0.31803921568627447, "calib/mu_c": 0.3198780487804878, "calib/mu_w": 0.31716763005780346, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09435294117647058, "calib/std_conf": 0.2150915865834661, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1937.0, "completions/max_terminated_length": 1937.0, "completions/mean_length": 312.6328125, "completions/mean_terminated_length": 312.6328125, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.04586666666666667, "grad_norm": 0.11977484822273254, "learning_rate": 4.361111111111112e-06, "loss": 0.023, "num_tokens": 9426242.0, "reward": 0.9972071051597595, "reward_std": 0.2933819591999054, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.7310156226158142, "rewards/format_reward_step_strict": 0.98828125, "step": 43 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.160131162802891e-07, "aux_brier/mean_group_std": 0.06065450534212685, "aux_brier/mean_r": 0.9425842242965002, "aux_brier/n_active_tok": 197.25, "aux_brier/n_groups": 11.53125, "aux_brier/n_step_records": 49.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5035491815152833, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.1675686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": -0.01358322468491957, "calib/mean_conf": 0.3016078431372549, "calib/mu_c": 0.2921794871794872, "calib/mu_w": 0.3057627118644068, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0816470588235294, "calib/std_conf": 0.19989056598513535, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1780.0, "completions/max_terminated_length": 1780.0, "completions/mean_length": 337.79296875, "completions/mean_terminated_length": 337.79296875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.046933333333333334, "grad_norm": 0.032078322023153305, "learning_rate": 4.333333333333334e-06, "loss": 0.0346, "num_tokens": 9619037.0, "reward": 0.9874945878982544, "reward_std": 0.27549248933792114, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7390409708023071, "rewards/format_reward_step_strict": 0.99609375, "step": 44 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.5251341978217425e-07, "aux_brier/mean_group_std": 0.05199722409692429, "aux_brier/mean_r": 0.9553006842650337, "aux_brier/n_active_tok": 182.625, "aux_brier/n_groups": 9.84375, "aux_brier/n_step_records": 45.65625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4215377067563779, "calib/avg_num_step_conf": 5.72265625, "calib/ece": 0.23852589641434263, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": -0.04835085506027481, "calib/mean_conf": 0.24354581673306774, "calib/mu_c": 0.21195402298850569, "calib/mu_w": 0.2603048780487805, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06772908366533865, "calib/std_conf": 0.17582751670290586, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 866.0, "completions/max_terminated_length": 866.0, "completions/mean_length": 314.13671875, "completions/mean_terminated_length": 315.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.048, "grad_norm": 0.0748474970459938, "learning_rate": 4.305555555555556e-06, "loss": 0.0373, "num_tokens": 9804504.0, "reward": 1.0080393552780151, "reward_std": 0.3095443546772003, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6962199211120605, "rewards/format_reward_step_strict": 0.98046875, "step": 45 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.267610718259988e-08, "aux_brier/mean_group_std": 0.06327006275738548, "aux_brier/mean_r": 0.9465535762646131, "aux_brier/n_active_tok": 199.75, "aux_brier/n_groups": 11.46875, "aux_brier/n_step_records": 49.9375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.43403643336529246, "calib/avg_num_step_conf": 6.26171875, "calib/ece": 0.2702629921259843, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.04364034515819751, "calib/mean_conf": 0.22115433070866142, "calib/mu_c": 0.1955542857142857, "calib/mu_w": 0.23919463087248322, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03901574803149606, "calib/std_conf": 0.15636737217101962, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1708.0, "completions/max_terminated_length": 1708.0, "completions/mean_length": 331.9765625, "completions/mean_terminated_length": 331.9765625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.04906666666666667, "grad_norm": 0.17392946779727936, "learning_rate": 4.277777777777778e-06, "loss": 0.0383, "num_tokens": 9994258.0, "reward": 1.0756181478500366, "reward_std": 0.3038777709007263, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6696599721908569, "rewards/format_reward_step_strict": 0.98828125, "step": 46 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.2866831400625287e-07, "aux_brier/mean_group_std": 0.05879260840197176, "aux_brier/mean_r": 0.9562548058580053, "aux_brier/n_active_tok": 213.875, "aux_brier/n_groups": 13.4375, "aux_brier/n_step_records": 53.46875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4816504250466515, "calib/avg_num_step_conf": 6.703125, "calib/ece": 0.23188000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": -0.016387449028958417, "calib/mean_conf": 0.18284, "calib/mu_c": 0.17241758241758243, "calib/mu_w": 0.18880503144654084, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.025360000000000004, "calib/std_conf": 0.1448486603320859, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2975.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 374.66015625, "completions/mean_terminated_length": 376.1294250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.050133333333333335, "grad_norm": 0.08345242589712143, "learning_rate": 4.25e-06, "loss": 0.0466, "num_tokens": 10196147.0, "reward": 1.0144307613372803, "reward_std": 0.32323795557022095, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6905355453491211, "rewards/format_reward_step_strict": 0.97265625, "step": 47 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.769369222086151e-07, "aux_brier/mean_group_std": 0.04574837180941228, "aux_brier/mean_r": 0.9644007629165204, "aux_brier/n_active_tok": 179.5, "aux_brier/n_groups": 9.03125, "aux_brier/n_step_records": 44.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4432930339666091, "calib/avg_num_step_conf": 5.6875, "calib/ece": 0.16326953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0179101077391233, "calib/mean_conf": 0.14508984375000003, "calib/mu_c": 0.13158730158730159, "calib/mu_w": 0.14949740932642488, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.031132812499999985, "calib/std_conf": 0.10950665172422444, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1037.0, "completions/max_terminated_length": 1037.0, "completions/mean_length": 293.421875, "completions/mean_terminated_length": 294.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.0512, "grad_norm": 0.11976519972085953, "learning_rate": 4.222222222222223e-06, "loss": 0.0628, "num_tokens": 10374951.0, "reward": 0.9385948181152344, "reward_std": 0.18301494419574738, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7856290936470032, "rewards/format_reward_step_strict": 0.9921875, "step": 48 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.169830846225466e-08, "aux_brier/mean_group_std": 0.04859701273715004, "aux_brier/mean_r": 0.9599143498316884, "aux_brier/n_active_tok": 194.75, "aux_brier/n_groups": 11.84375, "aux_brier/n_step_records": 48.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.517244884130641, "calib/avg_num_step_conf": 6.12109375, "calib/ece": 0.2568924302788845, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.009650359127253039, "calib/mean_conf": 0.14071713147410359, "calib/mu_c": 0.1346808510638298, "calib/mu_w": 0.14433121019108283, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011553784860557768, "calib/std_conf": 0.11051258543343341, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2021.0, "completions/max_terminated_length": 2021.0, "completions/mean_length": 342.5234375, "completions/mean_terminated_length": 342.5234375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.05226666666666667, "grad_norm": 0.2972572147846222, "learning_rate": 4.194444444444445e-06, "loss": 0.1037, "num_tokens": 10567173.0, "reward": 1.0237152576446533, "reward_std": 0.2431778907775879, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6807984113693237, "rewards/format_reward_step_strict": 0.97265625, "step": 49 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.9017204059299537e-07, "aux_brier/mean_group_std": 0.047627489063467456, "aux_brier/mean_r": 0.9671897326182052, "aux_brier/n_active_tok": 197.5, "aux_brier/n_groups": 11.25, "aux_brier/n_step_records": 49.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5290701364646144, "calib/avg_num_step_conf": 6.21484375, "calib/ece": 0.360547619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002993970168200605, "calib/mean_conf": 0.1061984126984127, "calib/mu_c": 0.10782608695652178, "calib/mu_w": 0.10483211678832118, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0051984126984126995, "calib/std_conf": 0.07634929338674379, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2177.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 356.31640625, "completions/mean_terminated_length": 357.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.05333333333333334, "grad_norm": 0.02677062712609768, "learning_rate": 4.166666666666667e-06, "loss": 0.0327, "num_tokens": 10763750.0, "reward": 1.0912978649139404, "reward_std": 0.3045734167098999, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6151912212371826, "rewards/format_reward_step_strict": 0.9765625, "step": 50 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.5887713887274835e-06, "aux_brier/mean_group_std": 0.04609024460167228, "aux_brier/mean_r": 0.9644180483138113, "aux_brier/n_active_tok": 207.625, "aux_brier/n_groups": 13.09375, "aux_brier/n_step_records": 51.90625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4327722772277227, "calib/avg_num_step_conf": 6.66015625, "calib/ece": 0.31158964143426293, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.014207326732673276, "calib/mean_conf": 0.10522310756972113, "calib/mu_c": 0.09673267326732674, "calib/mu_w": 0.11094000000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007211155378486056, "calib/std_conf": 0.08254356584769627, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2336.0, "completions/max_terminated_length": 2336.0, "completions/mean_length": 333.21875, "completions/mean_terminated_length": 337.16998291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.0544, "grad_norm": 0.05267907679080963, "learning_rate": 4.138888888888889e-06, "loss": 0.0192, "num_tokens": 10958350.0, "reward": 1.0391311645507812, "reward_std": 0.29196834564208984, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6408998966217041, "rewards/format_reward_step_strict": 0.96875, "step": 51 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.583419999151019e-07, "aux_brier/mean_group_std": 0.06857622843521455, "aux_brier/mean_r": 0.9559836778689764, "aux_brier/n_active_tok": 169.75, "aux_brier/n_groups": 8.65625, "aux_brier/n_step_records": 42.4375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5377759960527939, "calib/avg_num_step_conf": 5.31640625, "calib/ece": 0.44505882352941173, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007583569754533101, "calib/mean_conf": 0.08043137254901961, "calib/mu_c": 0.08402985074626865, "calib/mu_w": 0.07644628099173555, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.052433059060385125, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 888.0, "completions/max_terminated_length": 888.0, "completions/mean_length": 304.37109375, "completions/mean_terminated_length": 305.5647277832031, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.055466666666666664, "grad_norm": 0.03042164258658886, "learning_rate": 4.111111111111111e-06, "loss": -0.007, "num_tokens": 11144221.0, "reward": 1.163251280784607, "reward_std": 0.3101959526538849, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.551442563533783, "rewards/format_reward_step_strict": 0.99609375, "step": 52 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.88559796332477e-07, "aux_brier/mean_group_std": 0.08276254140809931, "aux_brier/mean_r": 0.948797932380574, "aux_brier/n_active_tok": 199.5, "aux_brier/n_groups": 11.84375, "aux_brier/n_step_records": 49.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5718158890290037, "calib/avg_num_step_conf": 6.24609375, "calib/ece": 0.4018968253968254, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010276166456494332, "calib/mean_conf": 0.08643650793650795, "calib/mu_c": 0.09173770491803279, "calib/mu_w": 0.08146153846153846, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0021031746031746033, "calib/std_conf": 0.06867348510203528, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2008.0, "completions/max_terminated_length": 2008.0, "completions/mean_length": 359.95703125, "completions/mean_terminated_length": 359.95703125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.05653333333333333, "grad_norm": 0.011011505499482155, "learning_rate": 4.083333333333334e-06, "loss": 0.0329, "num_tokens": 11342194.0, "reward": 1.116516351699829, "reward_std": 0.30897217988967896, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5832530856132507, "rewards/format_reward_step_strict": 0.98046875, "step": 53 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.2435113564135403e-06, "aux_brier/mean_group_std": 0.07204236926393826, "aux_brier/mean_r": 0.9523674597628599, "aux_brier/n_active_tok": 171.75, "aux_brier/n_groups": 10.78125, "aux_brier/n_step_records": 42.9375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.501830114855484, "calib/avg_num_step_conf": 5.3671875, "calib/ece": 0.4752569169960474, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0042010602044680045, "calib/mean_conf": 0.07415019762845851, "calib/mu_c": 0.07604316546762589, "calib/mu_w": 0.07184210526315789, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05885408675384341, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 307.6484375, "completions/mean_terminated_length": 307.6484375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.0576, "grad_norm": 0.037044648081064224, "learning_rate": 4.055555555555556e-06, "loss": 0.0125, "num_tokens": 11527184.0, "reward": 1.1587554216384888, "reward_std": 0.2927601933479309, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5100218653678894, "rewards/format_reward_step_strict": 0.9765625, "step": 54 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.267408025923849e-07, "aux_brier/mean_group_std": 0.06855691800515884, "aux_brier/mean_r": 0.9563366510048938, "aux_brier/n_active_tok": 178.75, "aux_brier/n_groups": 11.09375, "aux_brier/n_step_records": 44.6875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.46380687491085437, "calib/avg_num_step_conf": 5.60546875, "calib/ece": 0.2638616600790514, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006634288974468709, "calib/mean_conf": 0.06570355731225296, "calib/mu_c": 0.06121951219512194, "calib/mu_w": 0.06785380116959065, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002727272727272727, "calib/std_conf": 0.052667154069498685, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2005.0, "completions/max_terminated_length": 2005.0, "completions/mean_length": 330.3359375, "completions/mean_terminated_length": 331.6313781738281, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.058666666666666666, "grad_norm": 0.009194295853376389, "learning_rate": 4.027777777777779e-06, "loss": 0.0647, "num_tokens": 11719574.0, "reward": 0.9875449538230896, "reward_std": 0.23884600400924683, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.7001798152923584, "rewards/format_reward_step_strict": 0.984375, "step": 55 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.0774575854854973e-06, "aux_brier/mean_group_std": 0.06709565865688792, "aux_brier/mean_r": 0.9565804266181223, "aux_brier/n_active_tok": 193.125, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 48.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5015060240963856, "calib/avg_num_step_conf": 6.0390625, "calib/ece": 0.28295275590551183, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00040115005476451526, "calib/mean_conf": 0.06507874015748032, "calib/mu_c": 0.06534090909090909, "calib/mu_w": 0.06493975903614457, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007874015748031496, "calib/std_conf": 0.0431691611819474, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2360.0, "completions/max_terminated_length": 2360.0, "completions/mean_length": 353.73046875, "completions/mean_terminated_length": 353.73046875, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.05973333333333333, "grad_norm": 0.0191681906580925, "learning_rate": 4.000000000000001e-06, "loss": 0.079, "num_tokens": 11916969.0, "reward": 1.0097177028656006, "reward_std": 0.24075153470039368, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6873081922531128, "rewards/format_reward_step_strict": 0.98828125, "step": 56 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.2804876683425448e-06, "aux_brier/mean_group_std": 0.04012048894455258, "aux_brier/mean_r": 0.9748410301637029, "aux_brier/n_active_tok": 187.375, "aux_brier/n_groups": 10.96875, "aux_brier/n_step_records": 46.84375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.39824218750000007, "calib/avg_num_step_conf": 5.8671875, "calib/ece": 0.4264919354838709, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.018453125000000008, "calib/mean_conf": 0.0630241935483871, "calib/mu_c": 0.0535, "calib/mu_w": 0.071953125, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00282258064516129, "calib/std_conf": 0.05080925171495352, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2632.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 373.07421875, "completions/mean_terminated_length": 373.07421875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.0608, "grad_norm": 0.010963428765535355, "learning_rate": 3.972222222222223e-06, "loss": 0.0625, "num_tokens": 12119268.0, "reward": 1.0871237516403198, "reward_std": 0.2706947922706604, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5438074469566345, "rewards/format_reward_step_strict": 0.96484375, "step": 57 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.280510507521541e-06, "aux_brier/mean_group_std": 0.04747223413674383, "aux_brier/mean_r": 0.9704677822945551, "aux_brier/n_active_tok": 210.375, "aux_brier/n_groups": 14.4375, "aux_brier/n_step_records": 52.59375, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.43560659433563476, "calib/avg_num_step_conf": 6.75390625, "calib/ece": 0.3235191836734694, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009850796111032824, "calib/mean_conf": 0.060154285714285716, "calib/mu_c": 0.054082978723404264, "calib/mu_w": 0.06393377483443709, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05175294955608909, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2440.0, "completions/max_terminated_length": 2440.0, "completions/mean_length": 427.703125, "completions/mean_terminated_length": 429.38043212890625, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.06186666666666667, "grad_norm": 0.05008196085691452, "learning_rate": 3.944444444444445e-06, "loss": 0.1937, "num_tokens": 12335080.0, "reward": 0.9957273602485657, "reward_std": 0.343005508184433, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.623534619808197, "rewards/format_reward_step_strict": 0.9453125, "step": 58 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.077476075727617e-07, "aux_brier/mean_group_std": 0.043504242172368354, "aux_brier/mean_r": 0.9694438174815395, "aux_brier/n_active_tok": 199.25, "aux_brier/n_groups": 10.84375, "aux_brier/n_step_records": 49.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.45559304768574527, "calib/avg_num_step_conf": 6.28515625, "calib/ece": 0.39793650793650787, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007465461259311128, "calib/mean_conf": 0.05420634920634921, "calib/mu_c": 0.05008849557522125, "calib/mu_w": 0.05755395683453238, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001865079365079365, "calib/std_conf": 0.040046662754626844, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 351.8046875, "completions/mean_terminated_length": 353.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.06293333333333333, "grad_norm": 0.06653322279453278, "learning_rate": 3.916666666666667e-06, "loss": 0.0079, "num_tokens": 12531390.0, "reward": 1.0792728662490845, "reward_std": 0.21965667605400085, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5827163457870483, "rewards/format_reward_step_strict": 0.984375, "step": 59 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.519963707679599e-07, "aux_brier/mean_group_std": 0.05771456903880541, "aux_brier/mean_r": 0.9655047520637373, "aux_brier/n_active_tok": 208.875, "aux_brier/n_groups": 14.25, "aux_brier/n_step_records": 52.21875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4223856209150327, "calib/avg_num_step_conf": 6.703125, "calib/ece": 0.36349593495934956, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010249183006535947, "calib/mean_conf": 0.05276422764227642, "calib/mu_c": 0.046764705882352944, "calib/mu_w": 0.05701388888888889, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008130081300813009, "calib/std_conf": 0.03601717776731212, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2928.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 401.3125, "completions/mean_terminated_length": 402.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.064, "grad_norm": 0.0890054702758789, "learning_rate": 3.88888888888889e-06, "loss": 0.1602, "num_tokens": 12742982.0, "reward": 1.031773328781128, "reward_std": 0.31017613410949707, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.5958437919616699, "rewards/format_reward_step_strict": 0.9609375, "step": 60 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.4815887647845472e-06, "aux_brier/mean_group_std": 0.049969137749620404, "aux_brier/mean_r": 0.9610750055704818, "aux_brier/n_active_tok": 177.875, "aux_brier/n_groups": 11.65625, "aux_brier/n_step_records": 44.46875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4649503722084367, "calib/avg_num_step_conf": 5.55859375, "calib/ece": 0.46775590551181107, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00020223325062034003, "calib/mean_conf": 0.04405511811023623, "calib/mu_c": 0.044153846153846155, "calib/mu_w": 0.043951612903225815, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.038776729584501, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2494.0, "completions/max_terminated_length": 2494.0, "completions/mean_length": 313.25, "completions/mean_terminated_length": 313.25, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.06506666666666666, "grad_norm": 0.019003357738256454, "learning_rate": 3.861111111111112e-06, "loss": 0.0693, "num_tokens": 12927238.0, "reward": 1.1353565454483032, "reward_std": 0.2609567642211914, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5258011817932129, "rewards/format_reward_step_strict": 0.9921875, "step": 61 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.6686910742368397e-06, "aux_brier/mean_group_std": 0.04529141929014861, "aux_brier/mean_r": 0.9706526473128498, "aux_brier/n_active_tok": 221.375, "aux_brier/n_groups": 15.5, "aux_brier/n_step_records": 55.34375, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4250916710319539, "calib/avg_num_step_conf": 6.91796875, "calib/ece": 0.29323770491803275, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006407992217316472, "calib/mean_conf": 0.0485655737704918, "calib/mu_c": 0.04433734939759036, "calib/mu_w": 0.05074534161490683, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008196721311475411, "calib/std_conf": 0.04543045264962123, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2216.0, "completions/max_terminated_length": 2216.0, "completions/mean_length": 431.98828125, "completions/mean_terminated_length": 433.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.06613333333333334, "grad_norm": 0.028923727571964264, "learning_rate": 3.833333333333334e-06, "loss": 0.0875, "num_tokens": 13144907.0, "reward": 0.9612126350402832, "reward_std": 0.28925445675849915, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.6495382785797119, "rewards/format_reward_step_strict": 0.94921875, "step": 62 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.602942894007313e-06, "aux_brier/mean_group_std": 0.05251221459921494, "aux_brier/mean_r": 0.9662158757182411, "aux_brier/n_active_tok": 200.25, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 50.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.519625, "calib/avg_num_step_conf": 6.2734375, "calib/ece": 0.44718577075098814, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001172750000000014, "calib/mean_conf": 0.049889328063241103, "calib/mu_c": 0.04929599999999999, "calib/mu_w": 0.05046875000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00150197628458498, "calib/std_conf": 0.03841220682472356, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2723.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 371.84375, "completions/mean_terminated_length": 371.84375, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.0672, "grad_norm": 0.08982036262750626, "learning_rate": 3.8055555555555556e-06, "loss": 0.0424, "num_tokens": 13348739.0, "reward": 1.116524338722229, "reward_std": 0.27404630184173584, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5442226529121399, "rewards/format_reward_step_strict": 0.984375, "step": 63 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.3130418960172197e-06, "aux_brier/mean_group_std": 0.042722527588865376, "aux_brier/mean_r": 0.9680877043615644, "aux_brier/n_active_tok": 178.375, "aux_brier/n_groups": 9.40625, "aux_brier/n_step_records": 44.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49615580124539327, "calib/avg_num_step_conf": 5.578125, "calib/ece": 0.44328685258964134, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001266997077138142, "calib/mean_conf": 0.04515936254980079, "calib/mu_c": 0.044508196721311476, "calib/mu_w": 0.04577519379844962, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001195219123505976, "calib/std_conf": 0.03392302192209186, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2537.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 367.1953125, "completions/mean_terminated_length": 368.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.06826666666666667, "grad_norm": 0.10238279402256012, "learning_rate": 3.777777777777778e-06, "loss": 0.0644, "num_tokens": 13546517.0, "reward": 1.0977150201797485, "reward_std": 0.2985689342021942, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5392975807189941, "rewards/format_reward_step_strict": 0.97265625, "step": 64 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.6477803753977014e-06, "aux_brier/mean_group_std": 0.04302786409165657, "aux_brier/mean_r": 0.976210879805285, "aux_brier/n_active_tok": 167.0, "aux_brier/n_groups": 9.59375, "aux_brier/n_step_records": 41.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48399531269273466, "calib/avg_num_step_conf": 5.21875, "calib/ece": 0.4312549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0019476995189342539, "calib/mean_conf": 0.04325490196078432, "calib/mu_c": 0.042231404958677686, "calib/mu_w": 0.04417910447761194, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02801949121121735, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2826.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 313.296875, "completions/mean_terminated_length": 313.296875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.06933333333333333, "grad_norm": 0.014597240835428238, "learning_rate": 3.7500000000000005e-06, "loss": 0.031, "num_tokens": 13731745.0, "reward": 1.1108815670013428, "reward_std": 0.19665348529815674, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5607136487960815, "rewards/format_reward_step_strict": 0.99609375, "step": 65 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.5007501467121394e-08, "aux_brier/mean_group_std": 0.0469428577393126, "aux_brier/mean_r": 0.972645798072567, "aux_brier/n_active_tok": 201.375, "aux_brier/n_groups": 12.40625, "aux_brier/n_step_records": 50.34375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.45569280343716434, "calib/avg_num_step_conf": 6.38671875, "calib/ece": 0.34440000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004108485499462945, "calib/mean_conf": 0.0476, "calib/mu_c": 0.04510204081632653, "calib/mu_w": 0.049210526315789475, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03163289427162807, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 407.3828125, "completions/mean_terminated_length": 408.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.0704, "grad_norm": 0.014792166650295258, "learning_rate": 3.7222222222222225e-06, "loss": 0.0846, "num_tokens": 13942387.0, "reward": 1.0222983360290527, "reward_std": 0.22025427222251892, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6204434037208557, "rewards/format_reward_step_strict": 0.96875, "step": 66 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.0238753276901065e-06, "aux_brier/mean_group_std": 0.0689542588139005, "aux_brier/mean_r": 0.9557478736472529, "aux_brier/n_active_tok": 187.5, "aux_brier/n_groups": 12.5, "aux_brier/n_step_records": 46.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5230611328500866, "calib/avg_num_step_conf": 5.859375, "calib/ece": 0.43263999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013175957405863054, "calib/mean_conf": 0.04536, "calib/mu_c": 0.04605042016806722, "calib/mu_w": 0.044732824427480916, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001, "calib/std_conf": 0.0296659805164097, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2470.0, "completions/max_terminated_length": 2470.0, "completions/mean_length": 388.69921875, "completions/mean_terminated_length": 388.69921875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.07146666666666666, "grad_norm": 0.0096532441675663, "learning_rate": 3.694444444444445e-06, "loss": 0.0827, "num_tokens": 14146902.0, "reward": 1.0910406112670898, "reward_std": 0.2472362220287323, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5516625046730042, "rewards/format_reward_step_strict": 0.9765625, "step": 67 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.434876368837216e-06, "aux_brier/mean_group_std": 0.03418800083448402, "aux_brier/mean_r": 0.9780984264300685, "aux_brier/n_active_tok": 177.75, "aux_brier/n_groups": 10.90625, "aux_brier/n_step_records": 44.4375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44980609066056326, "calib/avg_num_step_conf": 5.5546875, "calib/ece": 0.3773228346456693, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001331298874690058, "calib/mean_conf": 0.04637795275590552, "calib/mu_c": 0.045607476635514024, "calib/mu_w": 0.04693877551020408, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012204724409448822, "calib/std_conf": 0.037472704845089806, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2140.0, "completions/max_terminated_length": 2140.0, "completions/mean_length": 343.1953125, "completions/mean_terminated_length": 343.1953125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.07253333333333334, "grad_norm": 0.051165852695703506, "learning_rate": 3.6666666666666666e-06, "loss": 0.0449, "num_tokens": 14338848.0, "reward": 1.0701727867126465, "reward_std": 0.2218499779701233, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.608816385269165, "rewards/format_reward_step_strict": 0.9921875, "step": 68 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.0088494057657336e-06, "aux_brier/mean_group_std": 0.04627228822255264, "aux_brier/mean_r": 0.9687250312122738, "aux_brier/n_active_tok": 166.625, "aux_brier/n_groups": 10.3125, "aux_brier/n_step_records": 41.65625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48369788106630207, "calib/avg_num_step_conf": 5.21875, "calib/ece": 0.33550200803212854, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0032727272727272813, "calib/mean_conf": 0.046024096385542175, "calib/mu_c": 0.04399999999999999, "calib/mu_w": 0.04727272727272727, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.029721860264746814, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2178.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 389.7890625, "completions/mean_terminated_length": 392.8582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.0736, "grad_norm": 0.018729669973254204, "learning_rate": 3.638888888888889e-06, "loss": 0.044, "num_tokens": 14543130.0, "reward": 1.0191528797149658, "reward_std": 0.24353712797164917, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6312991976737976, "rewards/format_reward_step_strict": 0.97265625, "step": 69 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.4977007354608514e-06, "aux_brier/mean_group_std": 0.04300463422366555, "aux_brier/mean_r": 0.9763941743900566, "aux_brier/n_active_tok": 177.625, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 44.40625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47789115646258506, "calib/avg_num_step_conf": 5.55078125, "calib/ece": 0.37183266932270914, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002151360544217669, "calib/mean_conf": 0.04250996015936255, "calib/mu_c": 0.04125000000000001, "calib/mu_w": 0.04340136054421768, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.022663195245232134, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2213.0, "completions/max_terminated_length": 2213.0, "completions/mean_length": 393.6953125, "completions/mean_terminated_length": 393.6953125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.07466666666666667, "grad_norm": 0.023448629304766655, "learning_rate": 3.6111111111111115e-06, "loss": 0.0782, "num_tokens": 14750908.0, "reward": 1.047849178314209, "reward_std": 0.22698289155960083, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6054589748382568, "rewards/format_reward_step_strict": 0.98046875, "step": 70 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.71178257294347e-06, "aux_brier/mean_group_std": 0.047826320667128996, "aux_brier/mean_r": 0.9638519321791721, "aux_brier/n_active_tok": 174.75, "aux_brier/n_groups": 10.65625, "aux_brier/n_step_records": 43.6875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.465642382309049, "calib/avg_num_step_conf": 5.4609375, "calib/ece": 0.3138339920948616, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005360195360195365, "calib/mean_conf": 0.04584980237154151, "calib/mu_c": 0.042417582417582415, "calib/mu_w": 0.04777777777777778, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.027658439912256715, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 379.71484375, "completions/mean_terminated_length": 379.71484375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.07573333333333333, "grad_norm": 0.013549897819757462, "learning_rate": 3.5833333333333335e-06, "loss": 0.0927, "num_tokens": 14952523.0, "reward": 1.0146431922912598, "reward_std": 0.24198010563850403, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6601351499557495, "rewards/format_reward_step_strict": 0.98828125, "step": 71 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.6784378907058795e-06, "aux_brier/mean_group_std": 0.045835261917549525, "aux_brier/mean_r": 0.9716608193362408, "aux_brier/n_active_tok": 162.875, "aux_brier/n_groups": 9.25, "aux_brier/n_step_records": 40.71875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4948198198198198, "calib/avg_num_step_conf": 5.08984375, "calib/ece": 0.4011155378486056, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025225225225225176, "calib/mean_conf": 0.04111553784860558, "calib/mu_c": 0.04252252252252252, "calib/mu_w": 0.04, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.029476687351544087, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 361.96484375, "completions/mean_terminated_length": 363.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.0768, "grad_norm": 0.010157402604818344, "learning_rate": 3.555555555555556e-06, "loss": 0.0369, "num_tokens": 15149594.0, "reward": 1.0662086009979248, "reward_std": 0.27851247787475586, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5773344039916992, "rewards/format_reward_step_strict": 0.9765625, "step": 72 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.495429312582736e-06, "aux_brier/mean_group_std": 0.047189342901178016, "aux_brier/mean_r": 0.9716737835120474, "aux_brier/n_active_tok": 155.625, "aux_brier/n_groups": 8.40625, "aux_brier/n_step_records": 38.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4852047146401985, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.4717716535433071, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005533498759305172, "calib/mean_conf": 0.04003937007874016, "calib/mu_c": 0.03976923076923077, "calib/mu_w": 0.04032258064516129, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.017512605842156293, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3024.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 341.09375, "completions/mean_terminated_length": 341.09375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.07786666666666667, "grad_norm": 0.022411437705159187, "learning_rate": 3.5277777777777784e-06, "loss": 0.0334, "num_tokens": 15343946.0, "reward": 1.131695032119751, "reward_std": 0.2937101721763611, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5189679861068726, "rewards/format_reward_step_strict": 0.98828125, "step": 73 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.871608686509489e-07, "aux_brier/mean_group_std": 0.033736425267474904, "aux_brier/mean_r": 0.9816442073615657, "aux_brier/n_active_tok": 151.625, "aux_brier/n_groups": 8.53125, "aux_brier/n_step_records": 37.90625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.47032279314888015, "calib/avg_num_step_conf": 4.73828125, "calib/ece": 0.4068548387096775, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009715415019762863, "calib/mean_conf": 0.043951612903225815, "calib/mu_c": 0.03854545454545453, "calib/mu_w": 0.04826086956521739, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0036290322580645163, "calib/std_conf": 0.057932365297918294, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2353.0, "completions/max_terminated_length": 2353.0, "completions/mean_length": 376.48828125, "completions/mean_terminated_length": 377.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.07893333333333333, "grad_norm": 0.04105854406952858, "learning_rate": 3.5e-06, "loss": 0.1344, "num_tokens": 15544255.0, "reward": 1.05582857131958, "reward_std": 0.25634765625, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5670648217201233, "rewards/format_reward_step_strict": 0.96875, "step": 74 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.998759458708373e-06, "aux_brier/mean_group_std": 0.08682547556133395, "aux_brier/mean_r": 0.9394467702215956, "aux_brier/n_active_tok": 150.125, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 37.53125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5641666666666667, "calib/avg_num_step_conf": 4.69921875, "calib/ece": 0.570234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003184615384615376, "calib/mean_conf": 0.03914062500000001, "calib/mu_c": 0.04038461538461538, "calib/mu_w": 0.037200000000000004, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0184360102682054, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 876.0, "completions/max_terminated_length": 876.0, "completions/mean_length": 314.4765625, "completions/mean_terminated_length": 315.7098083496094, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.08, "grad_norm": 0.018008295446634293, "learning_rate": 3.4722222222222224e-06, "loss": 0.0035, "num_tokens": 15729513.0, "reward": 1.2168571949005127, "reward_std": 0.2569185495376587, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.4377410113811493, "rewards/format_reward_step_strict": 0.99609375, "step": 75 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.66402079527073e-06, "aux_brier/mean_group_std": 0.06815238686891403, "aux_brier/mean_r": 0.9553527514328811, "aux_brier/n_active_tok": 158.875, "aux_brier/n_groups": 9.40625, "aux_brier/n_step_records": 39.71875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.44999353085780824, "calib/avg_num_step_conf": 5.046875, "calib/ece": 0.4321686746987952, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0024770345452192963, "calib/mean_conf": 0.04172690763052209, "calib/mu_c": 0.04042372881355932, "calib/mu_w": 0.04290076335877862, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.018558215115376163, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 367.40234375, "completions/mean_terminated_length": 370.2952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.08106666666666666, "grad_norm": 0.026839526370167732, "learning_rate": 3.444444444444445e-06, "loss": 0.1172, "num_tokens": 15926624.0, "reward": 1.081865906715393, "reward_std": 0.26003503799438477, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.546213686466217, "rewards/format_reward_step_strict": 0.96875, "step": 76 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.547130481928455e-06, "aux_brier/mean_group_std": 0.06601589482379376, "aux_brier/mean_r": 0.9545857882905452, "aux_brier/n_active_tok": 157.875, "aux_brier/n_groups": 8.9375, "aux_brier/n_step_records": 39.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4649763779527559, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.45936507936507937, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006878740157480259, "calib/mean_conf": 0.03666666666666667, "calib/mu_c": 0.036320000000000005, "calib/mu_w": 0.03700787401574803, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.016207973738207152, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2683.0, "completions/max_terminated_length": 2683.0, "completions/mean_length": 352.59375, "completions/mean_terminated_length": 352.59375, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.08213333333333334, "grad_norm": 0.05786745995283127, "learning_rate": 3.416666666666667e-06, "loss": 0.0668, "num_tokens": 16121552.0, "reward": 1.1129639148712158, "reward_std": 0.2750511169433594, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.529980480670929, "rewards/format_reward_step_strict": 0.984375, "step": 77 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.102487066006468e-06, "aux_brier/mean_group_std": 0.06814221900666223, "aux_brier/mean_r": 0.9545244499354607, "aux_brier/n_active_tok": 158.25, "aux_brier/n_groups": 8.5, "aux_brier/n_step_records": 39.5625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5554151273575733, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.40772, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007693304815606858, "calib/mean_conf": 0.03732, "calib/mu_c": 0.03774774774774774, "calib/mu_w": 0.03697841726618705, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005200000000000001, "calib/std_conf": 0.015633860687622875, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2256.0, "completions/max_terminated_length": 2256.0, "completions/mean_length": 374.73046875, "completions/mean_terminated_length": 379.1739196777344, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.0832, "grad_norm": 0.08024071902036667, "learning_rate": 3.3888888888888893e-06, "loss": -0.0005, "num_tokens": 16325507.0, "reward": 1.0624713897705078, "reward_std": 0.28480181097984314, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5701979994773865, "rewards/format_reward_step_strict": 0.97265625, "step": 78 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.230446156603616e-06, "aux_brier/mean_group_std": 0.0627408457648402, "aux_brier/mean_r": 0.9599723760079495, "aux_brier/n_active_tok": 159.375, "aux_brier/n_groups": 9.34375, "aux_brier/n_step_records": 39.84375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5355419847328245, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.45500000000000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014045801526717597, "calib/mean_conf": 0.03328125, "calib/mu_c": 0.034, "calib/mu_w": 0.03259541984732824, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.015692821557562554, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 854.0, "completions/max_terminated_length": 854.0, "completions/mean_length": 358.53515625, "completions/mean_terminated_length": 359.9411926269531, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.08426666666666667, "grad_norm": 0.1831938475370407, "learning_rate": 3.3611111111111117e-06, "loss": 0.02, "num_tokens": 16523668.0, "reward": 1.1222201585769653, "reward_std": 0.19442899525165558, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.543567955493927, "rewards/format_reward_step_strict": 0.99609375, "step": 79 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.605331955951584e-06, "aux_brier/mean_group_std": 0.07893908027919577, "aux_brier/mean_r": 0.9501262667542172, "aux_brier/n_active_tok": 163.0, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 40.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4577464788732394, "calib/avg_num_step_conf": 5.09375, "calib/ece": 0.5284189723320158, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008685446009389641, "calib/mean_conf": 0.032845849802371534, "calib/mu_c": 0.03246478873239436, "calib/mu_w": 0.033333333333333326, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.015134572643453475, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2172.0, "completions/max_terminated_length": 2172.0, "completions/mean_length": 335.3359375, "completions/mean_terminated_length": 336.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.08533333333333333, "grad_norm": 0.011240994557738304, "learning_rate": 3.3333333333333333e-06, "loss": 0.0131, "num_tokens": 16711674.0, "reward": 1.165907382965088, "reward_std": 0.26851609349250793, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.46831679344177246, "rewards/format_reward_step_strict": 0.98828125, "step": 80 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.511620979837707e-06, "aux_brier/mean_group_std": 0.06015123286817331, "aux_brier/mean_r": 0.9625598457159206, "aux_brier/n_active_tok": 162.625, "aux_brier/n_groups": 10.34375, "aux_brier/n_step_records": 40.65625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4599961442066705, "calib/avg_num_step_conf": 5.09765625, "calib/ece": 0.501104, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001850266692371958, "calib/mean_conf": 0.030896, "calib/mu_c": 0.030030075187969924, "calib/mu_w": 0.03188034188034188, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010532862099163742, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2743.0, "completions/max_terminated_length": 2743.0, "completions/mean_length": 388.05078125, "completions/mean_terminated_length": 389.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0864, "grad_norm": 0.054194867610931396, "learning_rate": 3.3055555555555558e-06, "loss": 0.0418, "num_tokens": 16917263.0, "reward": 1.1276578903198242, "reward_std": 0.2511928677558899, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.48719385266304016, "rewards/format_reward_step_strict": 0.97265625, "step": 81 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.117875442164134e-06, "aux_brier/mean_group_std": 0.047760515298038604, "aux_brier/mean_r": 0.9708146194599993, "aux_brier/n_active_tok": 148.125, "aux_brier/n_groups": 8.03125, "aux_brier/n_step_records": 37.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5342366700201207, "calib/avg_num_step_conf": 4.82421875, "calib/ece": 0.5280708661417323, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014411468812877241, "calib/mean_conf": 0.030984251968503934, "calib/mu_c": 0.03161971830985916, "calib/mu_w": 0.030178571428571433, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009811999926145797, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2004.0, "completions/max_terminated_length": 2004.0, "completions/mean_length": 318.28125, "completions/mean_terminated_length": 319.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.08746666666666666, "grad_norm": 0.10533162951469421, "learning_rate": 3.277777777777778e-06, "loss": 0.0102, "num_tokens": 17104295.0, "reward": 1.168663740158081, "reward_std": 0.22765403985977173, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.47153007984161377, "rewards/format_reward_step_strict": 0.9921875, "step": 82 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.144158094965042e-05, "aux_brier/mean_group_std": 0.06297535992917615, "aux_brier/mean_r": 0.9501589902161882, "aux_brier/n_active_tok": 171.5, "aux_brier/n_groups": 10.125, "aux_brier/n_step_records": 42.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5132980399571437, "calib/avg_num_step_conf": 5.546875, "calib/ece": 0.48182539682539677, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000162601626016258, "calib/mean_conf": 0.03007936507936508, "calib/mu_c": 0.030000000000000002, "calib/mu_w": 0.03016260162601626, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.010540626750591659, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1712.0, "completions/max_terminated_length": 1712.0, "completions/mean_length": 376.55859375, "completions/mean_terminated_length": 381.02374267578125, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.08853333333333334, "grad_norm": 0.015030056238174438, "learning_rate": 3.2500000000000002e-06, "loss": 0.0041, "num_tokens": 17307958.0, "reward": 1.1205906867980957, "reward_std": 0.2422322928905487, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5058003664016724, "rewards/format_reward_step_strict": 0.98046875, "step": 83 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.0064067055531867e-05, "aux_brier/mean_group_std": 0.07812349659141118, "aux_brier/mean_r": 0.946900485161512, "aux_brier/n_active_tok": 152.5, "aux_brier/n_groups": 8.5625, "aux_brier/n_step_records": 38.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4547616068076586, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.4519762845849802, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0020410461769490643, "calib/mean_conf": 0.030237154150197627, "calib/mu_c": 0.02918032786885246, "calib/mu_w": 0.031221374045801525, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013685595952137801, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2304.0, "completions/max_terminated_length": 2304.0, "completions/mean_length": 336.703125, "completions/mean_terminated_length": 336.703125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.0896, "grad_norm": 0.0301885474473238, "learning_rate": 3.2222222222222227e-06, "loss": 0.0855, "num_tokens": 17500074.0, "reward": 1.105313777923584, "reward_std": 0.22371599078178406, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5384425520896912, "rewards/format_reward_step_strict": 0.98828125, "step": 84 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.720426093423157e-06, "aux_brier/mean_group_std": 0.06952150267362742, "aux_brier/mean_r": 0.9518234900073032, "aux_brier/n_active_tok": 170.75, "aux_brier/n_groups": 10.78125, "aux_brier/n_step_records": 42.6875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5591596638655462, "calib/avg_num_step_conf": 5.5546875, "calib/ece": 0.45745901639344266, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001692773109243692, "calib/mean_conf": 0.031065573770491805, "calib/mu_c": 0.031932773109243695, "calib/mu_w": 0.030240000000000003, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00040983606557377055, "calib/std_conf": 0.010999804547382808, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2923.0, "completions/max_terminated_length": 2923.0, "completions/mean_length": 418.2734375, "completions/mean_terminated_length": 421.5669250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.09066666666666667, "grad_norm": 0.023108787834644318, "learning_rate": 3.1944444444444443e-06, "loss": 0.1353, "num_tokens": 17714976.0, "reward": 1.0706396102905273, "reward_std": 0.27474746108055115, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5169335603713989, "rewards/format_reward_step_strict": 0.953125, "step": 85 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.3089714198686337e-06, "aux_brier/mean_group_std": 0.0705696843840126, "aux_brier/mean_r": 0.9573568341197747, "aux_brier/n_active_tok": 166.875, "aux_brier/n_groups": 10.09375, "aux_brier/n_step_records": 41.71875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5020595394120951, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.39404296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 8.831055357922529e-05, "calib/mean_conf": 0.03173828125000001, "calib/mu_c": 0.03178899082568807, "calib/mu_w": 0.031700680272108844, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009175260734491333, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 940.0, "completions/max_terminated_length": 940.0, "completions/mean_length": 354.41015625, "completions/mean_terminated_length": 355.8000183105469, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.09173333333333333, "grad_norm": 0.07036297768354416, "learning_rate": 3.1666666666666667e-06, "loss": 0.0081, "num_tokens": 17911217.0, "reward": 1.0738774538040161, "reward_std": 0.2070370614528656, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6001975536346436, "rewards/format_reward_step_strict": 0.99609375, "step": 86 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.883740235801405e-06, "aux_brier/mean_group_std": 0.08489868975486516, "aux_brier/mean_r": 0.9432929140526566, "aux_brier/n_active_tok": 158.625, "aux_brier/n_groups": 9.25, "aux_brier/n_step_records": 39.65625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4946864340068224, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.5568593625498007, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003730313565993175, "calib/mean_conf": 0.03278207171314741, "calib/mu_c": 0.03431283783783784, "calib/mu_w": 0.030582524271844665, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.029066496733548594, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2376.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 346.8515625, "completions/mean_terminated_length": 350.9644470214844, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.0928, "grad_norm": 0.034281790256500244, "learning_rate": 3.138888888888889e-06, "loss": 0.0222, "num_tokens": 18105507.0, "reward": 1.1783933639526367, "reward_std": 0.2476852387189865, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4401359558105469, "rewards/format_reward_step_strict": 0.97265625, "step": 87 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.146717485641886e-07, "aux_brier/mean_group_std": 0.07024082438078165, "aux_brier/mean_r": 0.9578755974288631, "aux_brier/n_active_tok": 167.875, "aux_brier/n_groups": 8.59375, "aux_brier/n_step_records": 41.96875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.506267716535433, "calib/avg_num_step_conf": 5.25390625, "calib/ece": 0.4708333333333333, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00018834645669291855, "calib/mean_conf": 0.033134920634920635, "calib/mu_c": 0.03322834645669292, "calib/mu_w": 0.03304, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011131146901506041, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2309.0, "completions/max_terminated_length": 2309.0, "completions/mean_length": 361.390625, "completions/mean_terminated_length": 364.2362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.09386666666666667, "grad_norm": 0.04726596921682358, "learning_rate": 3.1111111111111116e-06, "loss": 0.059, "num_tokens": 18307871.0, "reward": 1.116339921951294, "reward_std": 0.22101491689682007, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5200472474098206, "rewards/format_reward_step_strict": 0.98046875, "step": 88 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.687176872129271e-07, "aux_brier/mean_group_std": 0.07569657181508732, "aux_brier/mean_r": 0.9485480143526361, "aux_brier/n_active_tok": 185.25, "aux_brier/n_groups": 11.09375, "aux_brier/n_step_records": 46.3125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5373424759080802, "calib/avg_num_step_conf": 5.8125, "calib/ece": 0.4155078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007499382258462986, "calib/mean_conf": 0.0333984375, "calib/mu_c": 0.03298245614035088, "calib/mu_w": 0.033732394366197176, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0017968749999999999, "calib/std_conf": 0.017404130186785945, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 913.0, "completions/max_terminated_length": 913.0, "completions/mean_length": 365.80859375, "completions/mean_terminated_length": 367.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.09493333333333333, "grad_norm": 0.046813610941171646, "learning_rate": 3.0833333333333336e-06, "loss": 0.0114, "num_tokens": 18510406.0, "reward": 1.0880454778671265, "reward_std": 0.1457396298646927, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5787441730499268, "rewards/format_reward_step_strict": 0.99609375, "step": 89 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.3200514974730524e-06, "aux_brier/mean_group_std": 0.08874093639098773, "aux_brier/mean_r": 0.9315546717243935, "aux_brier/n_active_tok": 185.5, "aux_brier/n_groups": 10.5, "aux_brier/n_step_records": 46.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5146074444791993, "calib/avg_num_step_conf": 5.8125, "calib/ece": 0.5155118110236221, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006230841413825405, "calib/mean_conf": 0.03173228346456693, "calib/mu_c": 0.03201438848920863, "calib/mu_w": 0.03139130434782609, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008702835829699591, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 355.82421875, "completions/mean_terminated_length": 357.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.096, "grad_norm": 0.026251893490552902, "learning_rate": 3.055555555555556e-06, "loss": 0.0506, "num_tokens": 18704817.0, "reward": 1.1636964082717896, "reward_std": 0.2448050081729889, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.48291015625, "rewards/format_reward_step_strict": 0.9921875, "step": 90 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.9404183573843454e-06, "aux_brier/mean_group_std": 0.05313952584804532, "aux_brier/mean_r": 0.9705964411583399, "aux_brier/n_active_tok": 204.0, "aux_brier/n_groups": 13.375, "aux_brier/n_step_records": 51.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49686879960317465, "calib/avg_num_step_conf": 6.375, "calib/ece": 0.47090551181102364, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 3.1001984126990956e-05, "calib/mean_conf": 0.033031496062992126, "calib/mu_c": 0.033046875, "calib/mu_w": 0.03301587301587301, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009550064523733063, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2646.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 398.25, "completions/mean_terminated_length": 398.25, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.09706666666666666, "grad_norm": 0.04937665909528732, "learning_rate": 3.0277777777777776e-06, "loss": 0.0691, "num_tokens": 18914481.0, "reward": 1.1271090507507324, "reward_std": 0.16296067833900452, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5240613222122192, "rewards/format_reward_step_strict": 0.9921875, "step": 91 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.775527954110515e-06, "aux_brier/mean_group_std": 0.03261796387138986, "aux_brier/mean_r": 0.9787621676505022, "aux_brier/n_active_tok": 169.375, "aux_brier/n_groups": 9.5625, "aux_brier/n_step_records": 42.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5343565891472868, "calib/avg_num_step_conf": 5.296875, "calib/ece": 0.4596850393700787, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010468217054263596, "calib/mean_conf": 0.03322834645669291, "calib/mu_c": 0.033760000000000005, "calib/mu_w": 0.032713178294573646, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0003937007874015748, "calib/std_conf": 0.009990695652856174, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1947.0, "completions/max_terminated_length": 1947.0, "completions/mean_length": 330.39453125, "completions/mean_terminated_length": 330.39453125, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.09813333333333334, "grad_norm": 0.028532471507787704, "learning_rate": 3e-06, "loss": 0.0251, "num_tokens": 19105782.0, "reward": 1.1143501996994019, "reward_std": 0.2323814481496811, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5355257987976074, "rewards/format_reward_step_strict": 0.984375, "step": 92 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.171904975982301e-06, "aux_brier/mean_group_std": 0.05268081661406212, "aux_brier/mean_r": 0.9699430246107784, "aux_brier/n_active_tok": 201.375, "aux_brier/n_groups": 11.65625, "aux_brier/n_step_records": 50.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5058885842472582, "calib/avg_num_step_conf": 6.29296875, "calib/ece": 0.5011023622047245, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006168993020937152, "calib/mean_conf": 0.03433070866141732, "calib/mu_c": 0.03404411764705883, "calib/mu_w": 0.034661016949152544, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.009888397016745997, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1705.0, "completions/max_terminated_length": 1705.0, "completions/mean_length": 394.0859375, "completions/mean_terminated_length": 395.63140869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.0992, "grad_norm": 0.0336824506521225, "learning_rate": 2.9722222222222225e-06, "loss": 0.0405, "num_tokens": 19312444.0, "reward": 1.1493513584136963, "reward_std": 0.2246081531047821, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.49584296345710754, "rewards/format_reward_step_strict": 0.98828125, "step": 93 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.373293926247413e-06, "aux_brier/mean_group_std": 0.0731912638488667, "aux_brier/mean_r": 0.9554281096642034, "aux_brier/n_active_tok": 180.375, "aux_brier/n_groups": 10.71875, "aux_brier/n_step_records": 45.09375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6063914616941667, "calib/avg_num_step_conf": 5.73046875, "calib/ece": 0.5027734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003510396859473717, "calib/mean_conf": 0.0323828125, "calib/mu_c": 0.034014598540145984, "calib/mu_w": 0.030504201680672267, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009691830559282584, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2024.0, "completions/max_terminated_length": 2024.0, "completions/mean_length": 331.71875, "completions/mean_terminated_length": 333.0196228027344, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.10026666666666667, "grad_norm": 0.07165474444627762, "learning_rate": 2.944444444444445e-06, "loss": -0.0138, "num_tokens": 19506044.0, "reward": 1.160183072090149, "reward_std": 0.17494770884513855, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5001074075698853, "rewards/format_reward_step_strict": 1.0, "step": 94 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.3494379560441416e-06, "aux_brier/mean_group_std": 0.07180938440355472, "aux_brier/mean_r": 0.9510812979632172, "aux_brier/n_active_tok": 186.5, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 46.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5774968394437421, "calib/avg_num_step_conf": 5.97265625, "calib/ece": 0.5188537549407115, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038249051833122646, "calib/mean_conf": 0.03450592885375494, "calib/mu_c": 0.03621428571428572, "calib/mu_w": 0.032389380530973455, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013316345732329211, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2617.0, "completions/max_terminated_length": 2617.0, "completions/mean_length": 368.58984375, "completions/mean_terminated_length": 370.0353088378906, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.10133333333333333, "grad_norm": 0.061531733721494675, "learning_rate": 2.916666666666667e-06, "loss": 0.0277, "num_tokens": 19706531.0, "reward": 1.1648377180099487, "reward_std": 0.24357908964157104, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.4796636700630188, "rewards/format_reward_step_strict": 0.98828125, "step": 95 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.363781245544704e-06, "aux_brier/mean_group_std": 0.0684968045927621, "aux_brier/mean_r": 0.9433583986571044, "aux_brier/n_active_tok": 182.375, "aux_brier/n_groups": 9.9375, "aux_brier/n_step_records": 45.59375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.549083269671505, "calib/avg_num_step_conf": 5.703125, "calib/ece": 0.5654296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015571683218742124, "calib/mean_conf": 0.03613281250000001, "calib/mu_c": 0.03675324675324675, "calib/mu_w": 0.03519607843137254, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010168787333789795, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 882.0, "completions/max_terminated_length": 882.0, "completions/mean_length": 338.2265625, "completions/mean_terminated_length": 339.5529479980469, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.1024, "grad_norm": 0.06455075740814209, "learning_rate": 2.888888888888889e-06, "loss": 0.0046, "num_tokens": 19898933.0, "reward": 1.21187424659729, "reward_std": 0.1398114562034607, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.44124725461006165, "rewards/format_reward_step_strict": 1.0, "step": 96 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.579109583100152e-06, "aux_brier/mean_group_std": 0.04659628467337417, "aux_brier/mean_r": 0.9751859140923029, "aux_brier/n_active_tok": 197.875, "aux_brier/n_groups": 12.6875, "aux_brier/n_step_records": 49.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5492060443078499, "calib/avg_num_step_conf": 6.18359375, "calib/ece": 0.4189641434262948, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0024913561275451343, "calib/mean_conf": 0.03521912350597609, "calib/mu_c": 0.036578947368421044, "calib/mu_w": 0.03408759124087591, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.010907606945228587, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 379.0859375, "completions/mean_terminated_length": 379.0859375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.10346666666666667, "grad_norm": 0.014394364319741726, "learning_rate": 2.861111111111111e-06, "loss": 0.0726, "num_tokens": 20101051.0, "reward": 1.0771472454071045, "reward_std": 0.2732182741165161, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5664016008377075, "rewards/format_reward_step_strict": 0.98046875, "step": 97 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.318549504528368e-06, "aux_brier/mean_group_std": 0.09819066026645625, "aux_brier/mean_r": 0.9276285348741788, "aux_brier/n_active_tok": 198.5, "aux_brier/n_groups": 12.28125, "aux_brier/n_step_records": 49.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5613399440630562, "calib/avg_num_step_conf": 6.203125, "calib/ece": 0.5073809523809524, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005240274599542329, "calib/mean_conf": 0.04023809523809524, "calib/mu_c": 0.04260869565217391, "calib/mu_w": 0.03736842105263158, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025369156554926893, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 402.28125, "completions/mean_terminated_length": 402.28125, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.10453333333333334, "grad_norm": 0.024372128769755363, "learning_rate": 2.8333333333333335e-06, "loss": 0.1009, "num_tokens": 20310219.0, "reward": 1.153505563735962, "reward_std": 0.2584661543369293, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4890226423740387, "rewards/format_reward_step_strict": 0.984375, "step": 98 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.956959314214807e-06, "aux_brier/mean_group_std": 0.047173973295863036, "aux_brier/mean_r": 0.9749459528583877, "aux_brier/n_active_tok": 201.125, "aux_brier/n_groups": 11.21875, "aux_brier/n_step_records": 50.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5842321520287622, "calib/avg_num_step_conf": 6.28515625, "calib/ece": 0.2649212598425197, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002251815980629533, "calib/mean_conf": 0.042716535433070864, "calib/mu_c": 0.044285714285714275, "calib/mu_w": 0.04203389830508474, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0022440944881889765, "calib/std_conf": 0.017346127861177077, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 411.3984375, "completions/mean_terminated_length": 411.3984375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.1056, "grad_norm": 0.006937370169907808, "learning_rate": 2.805555555555556e-06, "loss": 0.0425, "num_tokens": 20521337.0, "reward": 0.9758594036102295, "reward_std": 0.22256730496883392, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7159379124641418, "rewards/format_reward_step_strict": 0.9921875, "step": 99 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.2180199117937125e-06, "aux_brier/mean_group_std": 0.078943701829868, "aux_brier/mean_r": 0.9410495528956415, "aux_brier/n_active_tok": 185.25, "aux_brier/n_groups": 9.78125, "aux_brier/n_step_records": 46.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6130044561601707, "calib/avg_num_step_conf": 5.83203125, "calib/ece": 0.4011417322834646, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005856398669428241, "calib/mean_conf": 0.043740157480314956, "calib/mu_c": 0.04699115044247788, "calib/mu_w": 0.04113475177304964, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012287060345896567, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1845.0, "completions/max_terminated_length": 1845.0, "completions/mean_length": 370.84375, "completions/mean_terminated_length": 372.2980651855469, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.10666666666666667, "grad_norm": 0.021300625056028366, "learning_rate": 2.7777777777777783e-06, "loss": 0.0129, "num_tokens": 20723681.0, "reward": 1.0831012725830078, "reward_std": 0.2674819231033325, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5902175903320312, "rewards/format_reward_step_strict": 0.98828125, "step": 100 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.315309462103571e-07, "aux_brier/mean_group_std": 0.05673601983434623, "aux_brier/mean_r": 0.9700530041716272, "aux_brier/n_active_tok": 218.0, "aux_brier/n_groups": 12.5, "aux_brier/n_step_records": 54.5, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6083049068485962, "calib/avg_num_step_conf": 7.10546875, "calib/ece": 0.3651394422310757, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00596759380739964, "calib/mean_conf": 0.0452191235059761, "calib/mu_c": 0.0487378640776699, "calib/mu_w": 0.04277027027027026, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01406628420809822, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2435.0, "completions/max_terminated_length": 2435.0, "completions/mean_length": 399.171875, "completions/mean_terminated_length": 403.9051513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.10773333333333333, "grad_norm": 0.09895070642232895, "learning_rate": 2.7500000000000004e-06, "loss": 0.0022, "num_tokens": 20932861.0, "reward": 1.0434354543685913, "reward_std": 0.2181820571422577, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6112421751022339, "rewards/format_reward_step_strict": 0.9765625, "step": 101 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.102560478199459e-06, "aux_brier/mean_group_std": 0.05331271846939198, "aux_brier/mean_r": 0.9617336874931215, "aux_brier/n_active_tok": 196.625, "aux_brier/n_groups": 11.3125, "aux_brier/n_step_records": 49.15625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6790845518118245, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.5193675889328062, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006489510489510485, "calib/mean_conf": 0.0458498023715415, "calib/mu_c": 0.048671328671328666, "calib/mu_w": 0.04218181818181818, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014078028249992373, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2670.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 342.27734375, "completions/mean_terminated_length": 343.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.1088, "grad_norm": 0.04714421182870865, "learning_rate": 2.7222222222222224e-06, "loss": -0.0133, "num_tokens": 21127180.0, "reward": 1.1712284088134766, "reward_std": 0.20991604030132294, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.48178908228874207, "rewards/format_reward_step_strict": 0.984375, "step": 102 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.0155701142554605e-06, "aux_brier/mean_group_std": 0.051797072497869974, "aux_brier/mean_r": 0.9657128892684819, "aux_brier/n_active_tok": 208.25, "aux_brier/n_groups": 12.4375, "aux_brier/n_step_records": 52.0625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5553644041288707, "calib/avg_num_step_conf": 6.51171875, "calib/ece": 0.49925196850393705, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0023715983734751214, "calib/mean_conf": 0.04964566929133858, "calib/mu_c": 0.05071942446043165, "calib/mu_w": 0.04834782608695653, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008267716535433072, "calib/std_conf": 0.012869439818615358, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1714.0, "completions/max_terminated_length": 1714.0, "completions/mean_length": 410.11328125, "completions/mean_terminated_length": 410.11328125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.10986666666666667, "grad_norm": 0.015716806054115295, "learning_rate": 2.6944444444444444e-06, "loss": 0.0166, "num_tokens": 21336721.0, "reward": 1.1644841432571411, "reward_std": 0.16049611568450928, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5016871094703674, "rewards/format_reward_step_strict": 0.9921875, "step": 103 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.2022423732392937e-06, "aux_brier/mean_group_std": 0.0487820049126365, "aux_brier/mean_r": 0.9658913802581619, "aux_brier/n_active_tok": 201.375, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 50.34375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5270082184517497, "calib/avg_num_step_conf": 6.4296875, "calib/ece": 0.30792968749999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00334305408271475, "calib/mean_conf": 0.05144531250000001, "calib/mu_c": 0.05358695652173913, "calib/mu_w": 0.05024390243902438, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.015732782550373716, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1044.0, "completions/max_terminated_length": 1044.0, "completions/mean_length": 369.2109375, "completions/mean_terminated_length": 370.6588439941406, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.11093333333333333, "grad_norm": 0.03981407731771469, "learning_rate": 2.666666666666667e-06, "loss": -0.0108, "num_tokens": 21537919.0, "reward": 1.019655466079712, "reward_std": 0.21034060418605804, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6723718643188477, "rewards/format_reward_step_strict": 0.984375, "step": 104 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.0062088509641427e-06, "aux_brier/mean_group_std": 0.04389778124056223, "aux_brier/mean_r": 0.9772179865062308, "aux_brier/n_active_tok": 202.125, "aux_brier/n_groups": 11.5625, "aux_brier/n_step_records": 50.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5621921182266011, "calib/avg_num_step_conf": 6.33203125, "calib/ece": 0.4012890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002948275862068965, "calib/mean_conf": 0.05183593750000001, "calib/mu_c": 0.05344827586206897, "calib/mu_w": 0.0505, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014742199920503513, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1866.0, "completions/max_terminated_length": 1866.0, "completions/mean_length": 393.2265625, "completions/mean_terminated_length": 394.7686462402344, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.112, "grad_norm": 0.025966746732592583, "learning_rate": 2.6388888888888893e-06, "loss": -0.0021, "num_tokens": 21744345.0, "reward": 1.098300814628601, "reward_std": 0.26817578077316284, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5885159969329834, "rewards/format_reward_step_strict": 0.99609375, "step": 105 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.061786714863572e-07, "aux_brier/mean_group_std": 0.08333754965469, "aux_brier/mean_r": 0.9468736608927933, "aux_brier/n_active_tok": 198.375, "aux_brier/n_groups": 10.96875, "aux_brier/n_step_records": 49.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5614661654135338, "calib/avg_num_step_conf": 6.19921875, "calib/ece": 0.4209881422924901, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017687969924812252, "calib/mean_conf": 0.05332015810276681, "calib/mu_c": 0.054250000000000013, "calib/mu_w": 0.05248120300751879, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.014143682117769804, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3029.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 390.21484375, "completions/mean_terminated_length": 390.21484375, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.11306666666666666, "grad_norm": 0.010917603969573975, "learning_rate": 2.6111111111111113e-06, "loss": 0.086, "num_tokens": 21948824.0, "reward": 1.1026880741119385, "reward_std": 0.19752243161201477, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5670023560523987, "rewards/format_reward_step_strict": 0.984375, "step": 106 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.976572967609673e-06, "aux_brier/mean_group_std": 0.06270684416678649, "aux_brier/mean_r": 0.9626215013030841, "aux_brier/n_active_tok": 210.25, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 52.5625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5606106870229008, "calib/avg_num_step_conf": 6.62109375, "calib/ece": 0.45664062499999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003357557251908394, "calib/mean_conf": 0.05507812500000002, "calib/mu_c": 0.05671755725190839, "calib/mu_w": 0.05336, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01581119528955275, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 366.03125, "completions/mean_terminated_length": 367.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.11413333333333334, "grad_norm": 0.10252933204174042, "learning_rate": 2.5833333333333337e-06, "loss": 0.0131, "num_tokens": 22147144.0, "reward": 1.1474798917770386, "reward_std": 0.22707661986351013, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5430445671081543, "rewards/format_reward_step_strict": 1.0, "step": 107 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.7100047952921535e-06, "aux_brier/mean_group_std": 0.07736919953269787, "aux_brier/mean_r": 0.9485736905461646, "aux_brier/n_active_tok": 204.75, "aux_brier/n_groups": 10.6875, "aux_brier/n_step_records": 51.1875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5773251139104797, "calib/avg_num_step_conf": 6.44140625, "calib/ece": 0.5821411764705882, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004966094880729029, "calib/mean_conf": 0.060996078431372565, "calib/mu_c": 0.06276829268292683, "calib/mu_w": 0.0578021978021978, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.017661634068076287, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1509.0, "completions/max_terminated_length": 1509.0, "completions/mean_length": 399.640625, "completions/mean_terminated_length": 401.2078552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.1152, "grad_norm": 0.06794151663780212, "learning_rate": 2.5555555555555557e-06, "loss": -0.0184, "num_tokens": 22352684.0, "reward": 1.2485934495925903, "reward_std": 0.23719748854637146, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.4318739175796509, "rewards/format_reward_step_strict": 0.9921875, "step": 108 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.611892308490571e-06, "aux_brier/mean_group_std": 0.06195867460477537, "aux_brier/mean_r": 0.9659657078520748, "aux_brier/n_active_tok": 240.75, "aux_brier/n_groups": 13.875, "aux_brier/n_step_records": 60.1875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.637492182614134, "calib/avg_num_step_conf": 7.53125, "calib/ece": 0.42561264822134387, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0059987492182614205, "calib/mean_conf": 0.06260869565217392, "calib/mu_c": 0.06569105691056912, "calib/mu_w": 0.0596923076923077, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010276679841897233, "calib/std_conf": 0.02386376525141341, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 445.56640625, "completions/mean_terminated_length": 445.56640625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.11626666666666667, "grad_norm": 0.0783458799123764, "learning_rate": 2.5277777777777778e-06, "loss": 0.091, "num_tokens": 22571349.0, "reward": 1.112328290939331, "reward_std": 0.18900692462921143, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5665007829666138, "rewards/format_reward_step_strict": 0.98046875, "step": 109 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.9528525646572703e-06, "aux_brier/mean_group_std": 0.05912420198305332, "aux_brier/mean_r": 0.9646633949108119, "aux_brier/n_active_tok": 202.25, "aux_brier/n_groups": 11.6875, "aux_brier/n_step_records": 50.5625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.491933418693982, "calib/avg_num_step_conf": 6.37109375, "calib/ece": 0.3733333333333333, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002245838668373877, "calib/mean_conf": 0.06317460317460317, "calib/mu_c": 0.06190909090909091, "calib/mu_w": 0.06415492957746478, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02035988873464131, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2042.0, "completions/max_terminated_length": 2042.0, "completions/mean_length": 387.70703125, "completions/mean_terminated_length": 390.75982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.11733333333333333, "grad_norm": 0.12243294715881348, "learning_rate": 2.5e-06, "loss": -0.0142, "num_tokens": 22775522.0, "reward": 1.072763442993164, "reward_std": 0.2849552035331726, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6035538911819458, "rewards/format_reward_step_strict": 0.984375, "step": 110 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.18118770684206e-06, "aux_brier/mean_group_std": 0.05934831866230545, "aux_brier/mean_r": 0.9594280376685048, "aux_brier/n_active_tok": 206.125, "aux_brier/n_groups": 11.25, "aux_brier/n_step_records": 51.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5145471464019851, "calib/avg_num_step_conf": 6.4609375, "calib/ece": 0.418503937007874, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0022208436724565767, "calib/mean_conf": 0.06968503937007875, "calib/mu_c": 0.06854838709677419, "calib/mu_w": 0.07076923076923076, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02389072919636157, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 399.2109375, "completions/mean_terminated_length": 402.3543395996094, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.1184, "grad_norm": 0.0319228358566761, "learning_rate": 2.4722222222222226e-06, "loss": -0.0364, "num_tokens": 22985128.0, "reward": 1.1226773262023926, "reward_std": 0.2619039714336395, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5688344240188599, "rewards/format_reward_step_strict": 0.9921875, "step": 111 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.869699991829954e-06, "aux_brier/mean_group_std": 0.08900723429146168, "aux_brier/mean_r": 0.9368204580656649, "aux_brier/n_active_tok": 219.5, "aux_brier/n_groups": 14.0, "aux_brier/n_step_records": 54.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6206819913639827, "calib/avg_num_step_conf": 6.91015625, "calib/ece": 0.42372509960159355, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00807340614681229, "calib/mean_conf": 0.07268924302788846, "calib/mu_c": 0.0767741935483871, "calib/mu_w": 0.06870078740157481, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001195219123505976, "calib/std_conf": 0.03017939463248341, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2916.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 435.79296875, "completions/mean_terminated_length": 439.2243957519531, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.11946666666666667, "grad_norm": 0.04229222610592842, "learning_rate": 2.4444444444444447e-06, "loss": 0.041, "num_tokens": 23204611.0, "reward": 1.1137549877166748, "reward_std": 0.22391203045845032, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5643951892852783, "rewards/format_reward_step_strict": 0.9765625, "step": 112 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.837179632313557e-06, "aux_brier/mean_group_std": 0.07313167256655854, "aux_brier/mean_r": 0.9554314023548512, "aux_brier/n_active_tok": 212.875, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 53.21875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.534562355730239, "calib/avg_num_step_conf": 6.7265625, "calib/ece": 0.4660236220472441, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011997005427662466, "calib/mean_conf": 0.0780708661417323, "calib/mu_c": 0.07751824817518249, "calib/mu_w": 0.07871794871794874, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002362204724409449, "calib/std_conf": 0.031470191145683585, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 379.30078125, "completions/mean_terminated_length": 380.78826904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.12053333333333334, "grad_norm": 0.05542054399847984, "learning_rate": 2.4166666666666667e-06, "loss": 0.0012, "num_tokens": 23406912.0, "reward": 1.1644923686981201, "reward_std": 0.22591552138328552, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5329699516296387, "rewards/format_reward_step_strict": 0.9921875, "step": 113 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.435252810910175e-07, "aux_brier/mean_group_std": 0.05343525777276535, "aux_brier/mean_r": 0.9654501302267348, "aux_brier/n_active_tok": 215.875, "aux_brier/n_groups": 13.0, "aux_brier/n_step_records": 53.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4301480051480051, "calib/avg_num_step_conf": 6.74609375, "calib/ece": 0.5135573122529645, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01527992277992278, "calib/mean_conf": 0.0824901185770751, "calib/mu_c": 0.07614864864864866, "calib/mu_w": 0.09142857142857144, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005533596837944663, "calib/std_conf": 0.04936478819386015, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2761.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 398.90625, "completions/mean_terminated_length": 400.4706115722656, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.1216, "grad_norm": 0.057506460696458817, "learning_rate": 2.388888888888889e-06, "loss": 0.0129, "num_tokens": 23614056.0, "reward": 1.1900386810302734, "reward_std": 0.23270046710968018, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.4867175817489624, "rewards/format_reward_step_strict": 0.98046875, "step": 114 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.736093342643354e-06, "aux_brier/mean_group_std": 0.04719172928977428, "aux_brier/mean_r": 0.966516740672227, "aux_brier/n_active_tok": 214.5, "aux_brier/n_groups": 12.0625, "aux_brier/n_step_records": 53.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5499689054726369, "calib/avg_num_step_conf": 6.90625, "calib/ece": 0.44574803149606307, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0024850746268656604, "calib/mean_conf": 0.08181102362204724, "calib/mu_c": 0.08298507462686568, "calib/mu_w": 0.08050000000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02849001269500677, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 383.9921875, "completions/mean_terminated_length": 387.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.12266666666666666, "grad_norm": 0.02982662245631218, "learning_rate": 2.361111111111111e-06, "loss": -0.0267, "num_tokens": 23817622.0, "reward": 1.1565759181976318, "reward_std": 0.26518166065216064, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5481789112091064, "rewards/format_reward_step_strict": 0.9921875, "step": 115 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.1013238353795316e-06, "aux_brier/mean_group_std": 0.03578971010904936, "aux_brier/mean_r": 0.97549386885622, "aux_brier/n_active_tok": 229.375, "aux_brier/n_groups": 13.9375, "aux_brier/n_step_records": 57.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4629319955406912, "calib/avg_num_step_conf": 7.171875, "calib/ece": 0.4496588235294118, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004413229282794515, "calib/mean_conf": 0.09151764705882354, "calib/mu_c": 0.0894927536231884, "calib/mu_w": 0.09390598290598291, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03734637397881659, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1759.0, "completions/max_terminated_length": 1759.0, "completions/mean_length": 452.3984375, "completions/mean_terminated_length": 454.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.12373333333333333, "grad_norm": 0.08965926617383957, "learning_rate": 2.3333333333333336e-06, "loss": 0.0048, "num_tokens": 24037956.0, "reward": 1.168178677558899, "reward_std": 0.24060563743114471, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5399023294448853, "rewards/format_reward_step_strict": 0.98828125, "step": 116 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.7152085230474086e-06, "aux_brier/mean_group_std": 0.04103590002150434, "aux_brier/mean_r": 0.9730136647613208, "aux_brier/n_active_tok": 241.875, "aux_brier/n_groups": 14.53125, "aux_brier/n_step_records": 60.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5059576069233663, "calib/avg_num_step_conf": 7.55859375, "calib/ece": 0.3790909090909091, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008014549103223245, "calib/mean_conf": 0.09126482213438737, "calib/mu_c": 0.0908403361344538, "calib/mu_w": 0.09164179104477613, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.032388163120866516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 434.98828125, "completions/mean_terminated_length": 434.98828125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.1248, "grad_norm": 0.07783927023410797, "learning_rate": 2.305555555555556e-06, "loss": 0.0594, "num_tokens": 24255913.0, "reward": 1.1086399555206299, "reward_std": 0.233639657497406, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5986223220825195, "rewards/format_reward_step_strict": 0.98828125, "step": 117 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.994761994061349e-07, "aux_brier/mean_group_std": 0.05193975383776459, "aux_brier/mean_r": 0.9674305220500208, "aux_brier/n_active_tok": 228.25, "aux_brier/n_groups": 13.6875, "aux_brier/n_step_records": 57.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.45714642678660666, "calib/avg_num_step_conf": 7.13671875, "calib/ece": 0.46248031496062986, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00930534732633681, "calib/mean_conf": 0.08649606299212598, "calib/mu_c": 0.08224637681159422, "calib/mu_w": 0.09155172413793103, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0028346456692913387, "calib/std_conf": 0.03446906041992227, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2277.0, "completions/max_terminated_length": 2277.0, "completions/mean_length": 412.62890625, "completions/mean_terminated_length": 414.2470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.12586666666666665, "grad_norm": 0.04219573363661766, "learning_rate": 2.277777777777778e-06, "loss": 0.0051, "num_tokens": 24465554.0, "reward": 1.168454885482788, "reward_std": 0.19997341930866241, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5331949591636658, "rewards/format_reward_step_strict": 0.9921875, "step": 118 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.0914254302690773e-06, "aux_brier/mean_group_std": 0.06396913642304744, "aux_brier/mean_r": 0.9579897961784033, "aux_brier/n_active_tok": 234.75, "aux_brier/n_groups": 13.46875, "aux_brier/n_step_records": 58.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5385101010101011, "calib/avg_num_step_conf": 7.4140625, "calib/ece": 0.4386904761904762, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00046969696969698105, "calib/mean_conf": 0.08757936507936508, "calib/mu_c": 0.08780303030303031, "calib/mu_w": 0.08733333333333333, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00123015873015873, "calib/std_conf": 0.028869122620313853, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2284.0, "completions/max_terminated_length": 2284.0, "completions/mean_length": 460.0234375, "completions/mean_terminated_length": 461.8274841308594, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.12693333333333334, "grad_norm": 0.025566641241312027, "learning_rate": 2.25e-06, "loss": 0.0355, "num_tokens": 24688384.0, "reward": 1.1455440521240234, "reward_std": 0.2643626928329468, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5509262084960938, "rewards/format_reward_step_strict": 0.984375, "step": 119 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.7104528945720574e-06, "aux_brier/mean_group_std": 0.0541598135283197, "aux_brier/mean_r": 0.9692082576724005, "aux_brier/n_active_tok": 219.875, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 54.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5153220762976861, "calib/avg_num_step_conf": 6.87109375, "calib/ece": 0.4294466403162055, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015766103814884164, "calib/mean_conf": 0.08438735177865614, "calib/mu_c": 0.08515384615384615, "calib/mu_w": 0.08357723577235773, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.023289723558292966, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2621.0, "completions/max_terminated_length": 2621.0, "completions/mean_length": 425.43359375, "completions/mean_terminated_length": 425.43359375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.128, "grad_norm": 0.04736745357513428, "learning_rate": 2.222222222222222e-06, "loss": 0.0703, "num_tokens": 24903983.0, "reward": 1.1388731002807617, "reward_std": 0.22865451872348785, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5554922223091125, "rewards/format_reward_step_strict": 0.984375, "step": 120 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.1134249522370965e-07, "aux_brier/mean_group_std": 0.04610778267001389, "aux_brier/mean_r": 0.9691028837137745, "aux_brier/n_active_tok": 254.375, "aux_brier/n_groups": 15.84375, "aux_brier/n_step_records": 63.59375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.41995152442913636, "calib/avg_num_step_conf": 8.3359375, "calib/ece": 0.37533864541832673, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010095037632351048, "calib/mean_conf": 0.09581673306772909, "calib/mu_c": 0.09042735042735044, "calib/mu_w": 0.10052238805970148, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0025099601593625495, "calib/std_conf": 0.03615524129351776, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 479.9921875, "completions/mean_terminated_length": 483.7716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.12906666666666666, "grad_norm": 0.0358821265399456, "learning_rate": 2.1944444444444445e-06, "loss": 0.0478, "num_tokens": 25131917.0, "reward": 1.096218228340149, "reward_std": 0.29095008969306946, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5958105325698853, "rewards/format_reward_step_strict": 0.98046875, "step": 121 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.868681449137323e-06, "aux_brier/mean_group_std": 0.05225980236816296, "aux_brier/mean_r": 0.9696455510374863, "aux_brier/n_active_tok": 226.625, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 56.65625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5430551079600388, "calib/avg_num_step_conf": 7.08203125, "calib/ece": 0.49194444444444446, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014521430873348229, "calib/mean_conf": 0.08345238095238097, "calib/mu_c": 0.08406896551724138, "calib/mu_w": 0.08261682242990656, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.021903144350213826, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2541.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 412.9140625, "completions/mean_terminated_length": 414.5333557128906, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.13013333333333332, "grad_norm": 0.03914785385131836, "learning_rate": 2.166666666666667e-06, "loss": 0.0433, "num_tokens": 25344967.0, "reward": 1.1850626468658447, "reward_std": 0.21061810851097107, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5058754086494446, "rewards/format_reward_step_strict": 0.984375, "step": 122 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.521688863114772e-07, "aux_brier/mean_group_std": 0.0756448575875828, "aux_brier/mean_r": 0.9458321603027738, "aux_brier/n_active_tok": 249.625, "aux_brier/n_groups": 16.1875, "aux_brier/n_step_records": 62.40625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.42751326174149307, "calib/avg_num_step_conf": 7.8984375, "calib/ece": 0.38771084337349393, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01668262388407296, "calib/mean_conf": 0.09759036144578315, "calib/mu_c": 0.08881355932203391, "calib/mu_w": 0.10549618320610687, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005702811244979919, "calib/std_conf": 0.05823212850851676, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 499.5390625, "completions/mean_terminated_length": 505.46246337890625, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.1312, "grad_norm": 0.05221722647547722, "learning_rate": 2.138888888888889e-06, "loss": 0.0486, "num_tokens": 25578137.0, "reward": 1.0872745513916016, "reward_std": 0.2682573199272156, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5756609439849854, "rewards/format_reward_step_strict": 0.96484375, "step": 123 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.05113540327784e-07, "aux_brier/mean_group_std": 0.053274477799555124, "aux_brier/mean_r": 0.9693263882317068, "aux_brier/n_active_tok": 228.75, "aux_brier/n_groups": 13.0, "aux_brier/n_step_records": 57.1875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4933316714445968, "calib/avg_num_step_conf": 7.1484375, "calib/ece": 0.47050980392156866, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006693256886451671, "calib/mean_conf": 0.0863529411764706, "calib/mu_c": 0.08605633802816902, "calib/mu_w": 0.08672566371681419, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025660546911943815, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2265.0, "completions/max_terminated_length": 2265.0, "completions/mean_length": 442.71875, "completions/mean_terminated_length": 442.71875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.13226666666666667, "grad_norm": 0.010232754051685333, "learning_rate": 2.1111111111111114e-06, "loss": 0.0051, "num_tokens": 25798289.0, "reward": 1.1849322319030762, "reward_std": 0.20356249809265137, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5287914276123047, "rewards/format_reward_step_strict": 0.99609375, "step": 124 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.0275335669422967e-07, "aux_brier/mean_group_std": 0.06576867125092614, "aux_brier/mean_r": 0.9578147937750323, "aux_brier/n_active_tok": 238.875, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 59.71875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4444934475806452, "calib/avg_num_step_conf": 7.46484375, "calib/ece": 0.40813492063492063, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010098286290322597, "calib/mean_conf": 0.089484126984127, "calib/mu_c": 0.08435483870967743, "calib/mu_w": 0.09445312500000003, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0027777777777777775, "calib/std_conf": 0.03736525721121247, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 492.0390625, "completions/mean_terminated_length": 492.0390625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.13333333333333333, "grad_norm": 0.007184287067502737, "learning_rate": 2.0833333333333334e-06, "loss": 0.058, "num_tokens": 26029059.0, "reward": 1.119678020477295, "reward_std": 0.23738686740398407, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5724620819091797, "rewards/format_reward_step_strict": 0.984375, "step": 125 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.0497718808409893e-06, "aux_brier/mean_group_std": 0.06422724326178852, "aux_brier/mean_r": 0.9581111974496881, "aux_brier/n_active_tok": 250.625, "aux_brier/n_groups": 15.8125, "aux_brier/n_step_records": 62.65625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6346127763461278, "calib/avg_num_step_conf": 8.16796875, "calib/ece": 0.5089878542510121, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011266784212667844, "calib/mean_conf": 0.08210526315789474, "calib/mu_c": 0.08671232876712329, "calib/mu_w": 0.07544554455445544, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.026928099132810256, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 489.84765625, "completions/mean_terminated_length": 495.6561584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1344, "grad_norm": 0.05721435695886612, "learning_rate": 2.0555555555555555e-06, "loss": 0.0539, "num_tokens": 26259924.0, "reward": 1.1713693141937256, "reward_std": 0.24430416524410248, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.4823523163795471, "rewards/format_reward_step_strict": 0.9609375, "step": 126 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.2410819398567252e-06, "aux_brier/mean_group_std": 0.03815124177472212, "aux_brier/mean_r": 0.9759511842763717, "aux_brier/n_active_tok": 254.125, "aux_brier/n_groups": 15.25, "aux_brier/n_step_records": 63.53125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5595032328276038, "calib/avg_num_step_conf": 8.07421875, "calib/ece": 0.42336000000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006253120798924525, "calib/mean_conf": 0.08464, "calib/mu_c": 0.08771653543307088, "calib/mu_w": 0.08146341463414636, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.029217638508270995, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2709.0, "completions/max_terminated_length": 2709.0, "completions/mean_length": 472.50390625, "completions/mean_terminated_length": 474.3569030761719, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.13546666666666668, "grad_norm": 0.017782477661967278, "learning_rate": 2.027777777777778e-06, "loss": 0.0695, "num_tokens": 26484557.0, "reward": 1.1242926120758057, "reward_std": 0.20912383496761322, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5596703290939331, "rewards/format_reward_step_strict": 0.9765625, "step": 127 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.740578158271333e-07, "aux_brier/mean_group_std": 0.05217880205747242, "aux_brier/mean_r": 0.9636292925598783, "aux_brier/n_active_tok": 226.625, "aux_brier/n_groups": 14.4375, "aux_brier/n_step_records": 56.65625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5945245901639344, "calib/avg_num_step_conf": 7.14453125, "calib/ece": 0.411251012145749, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00912708196721311, "calib/mean_conf": 0.08267611336032388, "calib/mu_c": 0.08729508196721311, "calib/mu_w": 0.078168, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.028494767547860126, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3055.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 481.00390625, "completions/mean_terminated_length": 486.70751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.13653333333333334, "grad_norm": 0.03822065517306328, "learning_rate": 2.0000000000000003e-06, "loss": 0.0539, "num_tokens": 26714358.0, "reward": 1.101963996887207, "reward_std": 0.2587791085243225, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5641059279441833, "rewards/format_reward_step_strict": 0.9609375, "step": 128 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.896738784038179e-07, "aux_brier/mean_group_std": 0.06356348446878757, "aux_brier/mean_r": 0.9580698250196816, "aux_brier/n_active_tok": 229.125, "aux_brier/n_groups": 12.875, "aux_brier/n_step_records": 57.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.561960933536276, "calib/avg_num_step_conf": 7.18359375, "calib/ece": 0.4877165354330709, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004614408929477398, "calib/mean_conf": 0.08913385826771655, "calib/mu_c": 0.09109589041095889, "calib/mu_w": 0.08648148148148149, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010236220472440946, "calib/std_conf": 0.029551066757920824, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2464.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 433.4140625, "completions/mean_terminated_length": 435.1137390136719, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.1376, "grad_norm": 0.013987638987600803, "learning_rate": 1.9722222222222224e-06, "loss": -0.0069, "num_tokens": 26927696.0, "reward": 1.1956642866134644, "reward_std": 0.1896999180316925, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5170320272445679, "rewards/format_reward_step_strict": 0.9921875, "step": 129 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.888920857895382e-06, "aux_brier/mean_group_std": 0.05988606144609164, "aux_brier/mean_r": 0.9603871145268268, "aux_brier/n_active_tok": 230.375, "aux_brier/n_groups": 12.375, "aux_brier/n_step_records": 57.59375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5627044025157233, "calib/avg_num_step_conf": 7.25, "calib/ece": 0.4921484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005339622641509431, "calib/mean_conf": 0.09378906250000002, "calib/mu_c": 0.09599999999999999, "calib/mu_w": 0.09066037735849056, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03325003579503477, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1662.0, "completions/max_terminated_length": 1662.0, "completions/mean_length": 420.58984375, "completions/mean_terminated_length": 422.2392272949219, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.13866666666666666, "grad_norm": 0.04971790313720703, "learning_rate": 1.944444444444445e-06, "loss": 0.0109, "num_tokens": 27140655.0, "reward": 1.2151026725769043, "reward_std": 0.18918028473854065, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5166605710983276, "rewards/format_reward_step_strict": 1.0, "step": 130 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.1584866417280182e-06, "aux_brier/mean_group_std": 0.051293487482689956, "aux_brier/mean_r": 0.9696329716740563, "aux_brier/n_active_tok": 233.875, "aux_brier/n_groups": 13.59375, "aux_brier/n_step_records": 58.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6290830381739473, "calib/avg_num_step_conf": 7.4296875, "calib/ece": 0.30363636363636365, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010230880230880243, "calib/mean_conf": 0.08932806324110673, "calib/mu_c": 0.09555555555555556, "calib/mu_w": 0.08532467532467532, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008300395256916996, "calib/std_conf": 0.02596636397927713, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 422.24609375, "completions/mean_terminated_length": 425.57086181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.13973333333333332, "grad_norm": 0.024130966514348984, "learning_rate": 1.916666666666667e-06, "loss": -0.0385, "num_tokens": 27354958.0, "reward": 1.0475884675979614, "reward_std": 0.1937212347984314, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6669164299964905, "rewards/format_reward_step_strict": 0.98828125, "step": 131 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.1103363183061496e-06, "aux_brier/mean_group_std": 0.09580266574378875, "aux_brier/mean_r": 0.933538275029279, "aux_brier/n_active_tok": 243.875, "aux_brier/n_groups": 13.8125, "aux_brier/n_step_records": 60.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47300931391840484, "calib/avg_num_step_conf": 7.97265625, "calib/ece": 0.5131225296442689, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004783549783549779, "calib/mean_conf": 0.09557312252964427, "calib/mu_c": 0.0937012987012987, "calib/mu_w": 0.09848484848484848, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.030414910611129702, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 446.3828125, "completions/mean_terminated_length": 449.89764404296875, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.1408, "grad_norm": 0.013880199752748013, "learning_rate": 1.888888888888889e-06, "loss": -0.007, "num_tokens": 27574824.0, "reward": 1.216127872467041, "reward_std": 0.22834108769893646, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.4895116984844208, "rewards/format_reward_step_strict": 0.984375, "step": 132 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.060480568832901e-06, "aux_brier/mean_group_std": 0.04970631518586872, "aux_brier/mean_r": 0.9654551346843375, "aux_brier/n_active_tok": 260.75, "aux_brier/n_groups": 14.0625, "aux_brier/n_step_records": 65.1875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4812318265926513, "calib/avg_num_step_conf": 8.1484375, "calib/ece": 0.27964426877470355, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0029130319851969205, "calib/mean_conf": 0.10375494071146246, "calib/mu_c": 0.10195876288659794, "calib/mu_w": 0.10487179487179486, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03518066338743322, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2876.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 529.78125, "completions/mean_terminated_length": 529.78125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.14186666666666667, "grad_norm": 0.043244197964668274, "learning_rate": 1.8611111111111113e-06, "loss": 0.0394, "num_tokens": 27816792.0, "reward": 1.038818120956421, "reward_std": 0.2598777711391449, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6708973050117493, "rewards/format_reward_step_strict": 0.984375, "step": 133 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.444852798190517e-07, "aux_brier/mean_group_std": 0.05817175404218855, "aux_brier/mean_r": 0.9565321587049483, "aux_brier/n_active_tok": 263.25, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 65.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.628446348733234, "calib/avg_num_step_conf": 8.2265625, "calib/ece": 0.41905511811023616, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009413810233482367, "calib/mean_conf": 0.10062992125984253, "calib/mu_c": 0.10515151515151515, "calib/mu_w": 0.09573770491803278, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.037083604873144424, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 544.76953125, "completions/mean_terminated_length": 544.76953125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.14293333333333333, "grad_norm": 0.010487113147974014, "learning_rate": 1.8333333333333333e-06, "loss": 0.0301, "num_tokens": 28065205.0, "reward": 1.1551158428192139, "reward_std": 0.17233869433403015, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5735882520675659, "rewards/format_reward_step_strict": 0.9921875, "step": 134 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.0978945453341566e-07, "aux_brier/mean_group_std": 0.032730762372773364, "aux_brier/mean_r": 0.9779772052692718, "aux_brier/n_active_tok": 258.25, "aux_brier/n_groups": 16.96875, "aux_brier/n_step_records": 64.5625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5892671517671517, "calib/avg_num_step_conf": 8.2578125, "calib/ece": 0.49424603174603177, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005738045738045733, "calib/mean_conf": 0.09702380952380954, "calib/mu_c": 0.09939189189189189, "calib/mu_w": 0.09365384615384616, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001984126984126984, "calib/std_conf": 0.04208211253028225, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2262.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 510.83984375, "completions/mean_terminated_length": 514.8621826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.144, "grad_norm": 0.04326924681663513, "learning_rate": 1.8055555555555557e-06, "loss": 0.0158, "num_tokens": 28301860.0, "reward": 1.1951674222946167, "reward_std": 0.25529035925865173, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5072324275970459, "rewards/format_reward_step_strict": 0.98046875, "step": 135 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.7476447551098033e-07, "aux_brier/mean_group_std": 0.052697667470998966, "aux_brier/mean_r": 0.9702369810499175, "aux_brier/n_active_tok": 268.375, "aux_brier/n_groups": 16.6875, "aux_brier/n_step_records": 67.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.576388450116889, "calib/avg_num_step_conf": 8.46484375, "calib/ece": 0.37202380952380953, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.013157894736842146, "calib/mean_conf": 0.10019841269841272, "calib/mu_c": 0.10714285714285718, "calib/mu_w": 0.09398496240601503, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.062319265007951326, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2290.0, "completions/max_terminated_length": 2290.0, "completions/mean_length": 485.88671875, "completions/mean_terminated_length": 487.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.14506666666666668, "grad_norm": 0.020810028538107872, "learning_rate": 1.777777777777778e-06, "loss": 0.0279, "num_tokens": 28534735.0, "reward": 1.1033775806427002, "reward_std": 0.230840265750885, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6010105609893799, "rewards/format_reward_step_strict": 0.9765625, "step": 136 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.4710560704497833e-06, "aux_brier/mean_group_std": 0.05615136213528867, "aux_brier/mean_r": 0.9564882836260133, "aux_brier/n_active_tok": 291.125, "aux_brier/n_groups": 19.15625, "aux_brier/n_step_records": 72.78125, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5463025323275862, "calib/avg_num_step_conf": 9.296875, "calib/ece": 0.37434426229508194, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003116918103448274, "calib/mean_conf": 0.10327868852459017, "calib/mu_c": 0.1049137931034483, "calib/mu_w": 0.10179687500000002, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0011065573770491805, "calib/std_conf": 0.029391161349853923, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 536.6875, "completions/mean_terminated_length": 543.0513916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.14613333333333334, "grad_norm": 0.04827215522527695, "learning_rate": 1.75e-06, "loss": 0.1091, "num_tokens": 28779111.0, "reward": 1.0727916955947876, "reward_std": 0.3305455446243286, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5802293419837952, "rewards/format_reward_step_strict": 0.94921875, "step": 137 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.542300720584592e-07, "aux_brier/mean_group_std": 0.0390463759611963, "aux_brier/mean_r": 0.9705610088967543, "aux_brier/n_active_tok": 261.25, "aux_brier/n_groups": 15.625, "aux_brier/n_step_records": 65.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5334576138147566, "calib/avg_num_step_conf": 8.1640625, "calib/ece": 0.5104330708661418, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002059131344845644, "calib/mean_conf": 0.10720472440944882, "calib/mu_c": 0.10641025641025642, "calib/mu_w": 0.10846938775510206, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001732283464566929, "calib/std_conf": 0.041588315777102806, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 484.7890625, "completions/mean_terminated_length": 484.7890625, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.1472, "grad_norm": 0.01851765625178814, "learning_rate": 1.7222222222222224e-06, "loss": 0.0265, "num_tokens": 29007553.0, "reward": 1.230314016342163, "reward_std": 0.25490623712539673, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.49938085675239563, "rewards/format_reward_step_strict": 0.9921875, "step": 138 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.8302141957526885e-07, "aux_brier/mean_group_std": 0.05321893990740268, "aux_brier/mean_r": 0.9606402633369969, "aux_brier/n_active_tok": 243.5, "aux_brier/n_groups": 13.625, "aux_brier/n_step_records": 60.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5765964240102172, "calib/avg_num_step_conf": 7.72265625, "calib/ece": 0.4759683794466404, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005374201787994873, "calib/mean_conf": 0.09715415019762846, "calib/mu_c": 0.09944827586206895, "calib/mu_w": 0.09407407407407407, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02985809575283303, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2362.0, "completions/max_terminated_length": 2362.0, "completions/mean_length": 465.15625, "completions/mean_terminated_length": 466.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.14826666666666666, "grad_norm": 0.01863935962319374, "learning_rate": 1.6944444444444446e-06, "loss": 0.0281, "num_tokens": 29229729.0, "reward": 1.1916272640228271, "reward_std": 0.23998242616653442, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5243219137191772, "rewards/format_reward_step_strict": 0.98828125, "step": 139 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.0164620990212896e-07, "aux_brier/mean_group_std": 0.046515244450848044, "aux_brier/mean_r": 0.9689238602553203, "aux_brier/n_active_tok": 242.875, "aux_brier/n_groups": 12.96875, "aux_brier/n_step_records": 60.71875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5593640305595705, "calib/avg_num_step_conf": 7.625, "calib/ece": 0.5574409448818898, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005654208823731868, "calib/mean_conf": 0.10003937007874016, "calib/mu_c": 0.10197604790419162, "calib/mu_w": 0.09632183908045976, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03087309599467006, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2788.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 482.44921875, "completions/mean_terminated_length": 484.3412170410156, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.14933333333333335, "grad_norm": 0.06730857491493225, "learning_rate": 1.6666666666666667e-06, "loss": 0.0164, "num_tokens": 29458252.0, "reward": 1.2639412879943848, "reward_std": 0.2333708554506302, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.4620152711868286, "rewards/format_reward_step_strict": 0.9921875, "step": 140 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.325099744242113e-07, "aux_brier/mean_group_std": 0.07243225738584982, "aux_brier/mean_r": 0.9495889869856867, "aux_brier/n_active_tok": 264.5, "aux_brier/n_groups": 15.0, "aux_brier/n_step_records": 66.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6613786952089704, "calib/avg_num_step_conf": 8.265625, "calib/ece": 0.4766007905138339, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.0061525229357797945, "calib/mean_conf": 0.10047430830039526, "calib/mu_c": 0.103125, "calib/mu_w": 0.0969724770642202, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003952569169960474, "calib/std_conf": 0.06619957497815487, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 557.9453125, "completions/mean_terminated_length": 560.1333618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.1504, "grad_norm": 0.012635590508580208, "learning_rate": 1.638888888888889e-06, "loss": 0.0347, "num_tokens": 29708182.0, "reward": 1.1885128021240234, "reward_std": 0.2409987598657608, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5274890661239624, "rewards/format_reward_step_strict": 0.98828125, "step": 141 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.078029411993313e-07, "aux_brier/mean_group_std": 0.06055486257432703, "aux_brier/mean_r": 0.958171473881109, "aux_brier/n_active_tok": 267.75, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 66.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.61821910147269, "calib/avg_num_step_conf": 8.5703125, "calib/ece": 0.42677165354330704, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013240539364941267, "calib/mean_conf": 0.09685039370078741, "calib/mu_c": 0.1031578947368421, "calib/mu_w": 0.08991735537190083, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03550980318013948, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2141.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 510.6875, "completions/mean_terminated_length": 512.6902465820312, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.15146666666666667, "grad_norm": 0.030646586790680885, "learning_rate": 1.6111111111111113e-06, "loss": 0.0078, "num_tokens": 29944078.0, "reward": 1.1579464673995972, "reward_std": 0.1530137062072754, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5692859292030334, "rewards/format_reward_step_strict": 0.9921875, "step": 142 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.110612904228983e-07, "aux_brier/mean_group_std": 0.05591120441773031, "aux_brier/mean_r": 0.9650633850167166, "aux_brier/n_active_tok": 269.125, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 67.28125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5596239989140763, "calib/avg_num_step_conf": 8.48046875, "calib/ece": 0.47085714285714286, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004455002036106978, "calib/mean_conf": 0.09648979591836734, "calib/mu_c": 0.09841726618705036, "calib/mu_w": 0.09396226415094339, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.029800782381853592, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2547.0, "completions/max_terminated_length": 2547.0, "completions/mean_length": 536.37890625, "completions/mean_terminated_length": 540.6023559570312, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.15253333333333333, "grad_norm": 0.09321467578411102, "learning_rate": 1.5833333333333333e-06, "loss": 0.0439, "num_tokens": 30188727.0, "reward": 1.1492786407470703, "reward_std": 0.23780958354473114, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5111774206161499, "rewards/format_reward_step_strict": 0.95703125, "step": 143 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.2350600124833022e-06, "aux_brier/mean_group_std": 0.027071194279506893, "aux_brier/mean_r": 0.9816140675573792, "aux_brier/n_active_tok": 273.75, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 68.4375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6468660111044218, "calib/avg_num_step_conf": 8.890625, "calib/ece": 0.50632, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0175650545186969, "calib/mean_conf": 0.09768, "calib/mu_c": 0.10463576158940398, "calib/mu_w": 0.08707070707070708, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.032781360557487545, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2009.0, "completions/max_terminated_length": 2009.0, "completions/mean_length": 510.34765625, "completions/mean_terminated_length": 516.3992309570312, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.1536, "grad_norm": 0.15854761004447937, "learning_rate": 1.5555555555555558e-06, "loss": -0.0021, "num_tokens": 30423504.0, "reward": 1.206978440284729, "reward_std": 0.23236022889614105, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.4997890591621399, "rewards/format_reward_step_strict": 0.9765625, "step": 144 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.570846455890901e-07, "aux_brier/mean_group_std": 0.07063431696107034, "aux_brier/mean_r": 0.9479332649163424, "aux_brier/n_active_tok": 289.875, "aux_brier/n_groups": 18.125, "aux_brier/n_step_records": 72.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6206608569353668, "calib/avg_num_step_conf": 9.05859375, "calib/ece": 0.5703187250996016, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014122004357298504, "calib/mean_conf": 0.10697211155378486, "calib/mu_c": 0.11152941176470589, "calib/mu_w": 0.09740740740740739, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04086216286136444, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2327.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 530.8046875, "completions/mean_terminated_length": 530.8046875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.15466666666666667, "grad_norm": 0.04638217017054558, "learning_rate": 1.527777777777778e-06, "loss": 0.054, "num_tokens": 30662094.0, "reward": 1.2672154903411865, "reward_std": 0.2803875803947449, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.45167461037635803, "rewards/format_reward_step_strict": 0.98046875, "step": 145 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.7337525574090407e-06, "aux_brier/mean_group_std": 0.02803442662753144, "aux_brier/mean_r": 0.9840496662095859, "aux_brier/n_active_tok": 277.875, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 69.46875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5574900602795947, "calib/avg_num_step_conf": 8.91015625, "calib/ece": 0.3627091633466136, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.0005027574708221377, "calib/mean_conf": 0.0952988047808765, "calib/mu_c": 0.09557522123893808, "calib/mu_w": 0.09507246376811594, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003904382470119522, "calib/std_conf": 0.06506057825744377, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1659.0, "completions/max_terminated_length": 1659.0, "completions/mean_length": 530.1796875, "completions/mean_terminated_length": 540.7410278320312, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.15573333333333333, "grad_norm": 0.11035989969968796, "learning_rate": 1.5e-06, "loss": -0.0713, "num_tokens": 30905036.0, "reward": 1.0813231468200684, "reward_std": 0.2637237310409546, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6065425872802734, "rewards/format_reward_step_strict": 0.9765625, "step": 146 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.50355611464137e-07, "aux_brier/mean_group_std": 0.05485977103305224, "aux_brier/mean_r": 0.9593606115870782, "aux_brier/n_active_tok": 285.75, "aux_brier/n_groups": 15.0625, "aux_brier/n_step_records": 71.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6319531839919457, "calib/avg_num_step_conf": 9.0859375, "calib/ece": 0.3577470355731225, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012301157815252997, "calib/mean_conf": 0.10549407114624505, "calib/mu_c": 0.11215517241379314, "calib/mu_w": 0.09985401459854014, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0023715415019762843, "calib/std_conf": 0.042298732500783094, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2304.0, "completions/max_terminated_length": 2304.0, "completions/mean_length": 534.6953125, "completions/mean_terminated_length": 538.905517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.1568, "grad_norm": 0.1080685704946518, "learning_rate": 1.4722222222222225e-06, "loss": 0.0109, "num_tokens": 31145598.0, "reward": 1.1032731533050537, "reward_std": 0.19219431281089783, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6240301132202148, "rewards/format_reward_step_strict": 0.98828125, "step": 147 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.543715002609417e-07, "aux_brier/mean_group_std": 0.05581918718485776, "aux_brier/mean_r": 0.9629090772270584, "aux_brier/n_active_tok": 267.125, "aux_brier/n_groups": 14.59375, "aux_brier/n_step_records": 66.78125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6817950581395349, "calib/avg_num_step_conf": 8.7421875, "calib/ece": 0.5501626016260164, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": 0.007065406976744196, "calib/mean_conf": 0.10796747967479675, "calib/mu_c": 0.1104375, "calib/mu_w": 0.1033720930232558, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003861788617886179, "calib/std_conf": 0.06123338466647912, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 537.64453125, "completions/mean_terminated_length": 544.019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.15786666666666666, "grad_norm": 0.07648499310016632, "learning_rate": 1.4444444444444445e-06, "loss": 0.0359, "num_tokens": 31388347.0, "reward": 1.2202636003494263, "reward_std": 0.24338200688362122, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.45917969942092896, "rewards/format_reward_step_strict": 0.9609375, "step": 148 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.798103858059342e-07, "aux_brier/mean_group_std": 0.05612567180075495, "aux_brier/mean_r": 0.9629851097100218, "aux_brier/n_active_tok": 284.0, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 71.0, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.523456308152775, "calib/avg_num_step_conf": 8.91796875, "calib/ece": 0.45616, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007068774806428521, "calib/mean_conf": 0.10784000000000002, "calib/mu_c": 0.11092198581560284, "calib/mu_w": 0.10385321100917431, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.041077176144423556, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2486.0, "completions/max_terminated_length": 2486.0, "completions/mean_length": 580.59375, "completions/mean_terminated_length": 582.87060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.15893333333333334, "grad_norm": 0.01760271191596985, "learning_rate": 1.4166666666666667e-06, "loss": 0.0146, "num_tokens": 31641435.0, "reward": 1.172803521156311, "reward_std": 0.2619955241680145, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5349640846252441, "rewards/format_reward_step_strict": 0.9765625, "step": 149 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.429146688444412e-08, "aux_brier/mean_group_std": 0.052168910145743194, "aux_brier/mean_r": 0.962446100782362, "aux_brier/n_active_tok": 291.0, "aux_brier/n_groups": 18.375, "aux_brier/n_step_records": 72.75, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5845388188453882, "calib/avg_num_step_conf": 9.26171875, "calib/ece": 0.44153846153846155, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.017595885865958838, "calib/mean_conf": 0.11603238866396763, "calib/mu_c": 0.12386861313868612, "calib/mu_w": 0.10627272727272728, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0014574898785425102, "calib/std_conf": 0.07325049200179348, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2841.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 544.83203125, "completions/mean_terminated_length": 549.1220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.16, "grad_norm": 0.15550203621387482, "learning_rate": 1.3888888888888892e-06, "loss": 0.0602, "num_tokens": 31885872.0, "reward": 1.1478254795074463, "reward_std": 0.28443899750709534, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5366144776344299, "rewards/format_reward_step_strict": 0.95703125, "step": 150 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.6749030640770286e-07, "aux_brier/mean_group_std": 0.05182812839019988, "aux_brier/mean_r": 0.9620563896566551, "aux_brier/n_active_tok": 292.25, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 73.0625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6068881257429666, "calib/avg_num_step_conf": 9.55078125, "calib/ece": 0.34688259109311737, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.011942940166424554, "calib/mean_conf": 0.1182995951417004, "calib/mu_c": 0.12477876106194694, "calib/mu_w": 0.11283582089552238, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003846153846153846, "calib/std_conf": 0.07323872092514362, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 576.828125, "completions/mean_terminated_length": 585.984130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.16106666666666666, "grad_norm": 0.03265540674328804, "learning_rate": 1.3611111111111112e-06, "loss": 0.0357, "num_tokens": 32140564.0, "reward": 1.0755085945129395, "reward_std": 0.2857625484466553, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6145347356796265, "rewards/format_reward_step_strict": 0.9609375, "step": 151 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.598158972411113e-07, "aux_brier/mean_group_std": 0.021333223741434024, "aux_brier/mean_r": 0.9815279845409678, "aux_brier/n_active_tok": 320.0, "aux_brier/n_groups": 21.0625, "aux_brier/n_step_records": 80.0, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5063945578231293, "calib/avg_num_step_conf": 10.24609375, "calib/ece": 0.4658367346938776, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.006619047619047622, "calib/mean_conf": 0.11612244897959186, "calib/mu_c": 0.11328571428571428, "calib/mu_w": 0.1199047619047619, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00526530612244898, "calib/std_conf": 0.06644980694465628, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 596.078125, "completions/mean_terminated_length": 605.5397338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.16213333333333332, "grad_norm": 0.18766255676746368, "learning_rate": 1.3333333333333334e-06, "loss": 0.0151, "num_tokens": 32398552.0, "reward": 1.1546235084533691, "reward_std": 0.2653184235095978, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5169316530227661, "rewards/format_reward_step_strict": 0.95703125, "step": 152 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.1766621638441563e-07, "aux_brier/mean_group_std": 0.06026336398708991, "aux_brier/mean_r": 0.9515601117150679, "aux_brier/n_active_tok": 303.75, "aux_brier/n_groups": 19.0625, "aux_brier/n_step_records": 75.9375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5809523809523809, "calib/avg_num_step_conf": 9.4921875, "calib/ece": 0.44047808764940244, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.010137065637065637, "calib/mean_conf": 0.12430278884462152, "calib/mu_c": 0.12878571428571428, "calib/mu_w": 0.11864864864864864, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0035059760956175296, "calib/std_conf": 0.08171842112270698, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2612.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 582.7734375, "completions/mean_terminated_length": 582.7734375, "completions/min_length": 239.0, "completions/min_terminated_length": 239.0, "epoch": 0.1632, "grad_norm": 0.0220626313239336, "learning_rate": 1.3055555555555556e-06, "loss": 0.0362, "num_tokens": 32655062.0, "reward": 1.166546106338501, "reward_std": 0.21586893498897552, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5411847829818726, "rewards/format_reward_step_strict": 0.96875, "step": 153 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.138314249393151e-07, "aux_brier/mean_group_std": 0.05190131323072046, "aux_brier/mean_r": 0.9613522500884691, "aux_brier/n_active_tok": 289.625, "aux_brier/n_groups": 16.09375, "aux_brier/n_step_records": 72.40625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.614176245210728, "calib/avg_num_step_conf": 9.17578125, "calib/ece": 0.338804780876494, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016377394636015313, "calib/mean_conf": 0.12334661354581673, "calib/mu_c": 0.1321551724137931, "calib/mu_w": 0.11577777777777779, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.044165067257084134, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 534.1328125, "completions/mean_terminated_length": 536.2274780273438, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.16426666666666667, "grad_norm": 0.04481395334005356, "learning_rate": 1.2777777777777779e-06, "loss": 0.05, "num_tokens": 32896240.0, "reward": 1.0980093479156494, "reward_std": 0.2588661313056946, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6264125108718872, "rewards/format_reward_step_strict": 0.9765625, "step": 154 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.8488905962076316e-07, "aux_brier/mean_group_std": 0.055795254315706595, "aux_brier/mean_r": 0.9584929775784383, "aux_brier/n_active_tok": 272.0, "aux_brier/n_groups": 15.875, "aux_brier/n_step_records": 68.0, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5912463882322039, "calib/avg_num_step_conf": 8.78515625, "calib/ece": 0.3091164658634538, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010975177304964542, "calib/mean_conf": 0.12461847389558234, "calib/mu_c": 0.13083333333333333, "calib/mu_w": 0.11985815602836879, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04246372205593495, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2867.0, "completions/max_terminated_length": 2867.0, "completions/mean_length": 517.8515625, "completions/mean_terminated_length": 521.9291381835938, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.16533333333333333, "grad_norm": 0.021404221653938293, "learning_rate": 1.25e-06, "loss": 0.0495, "num_tokens": 33136026.0, "reward": 1.0664126873016357, "reward_std": 0.29070472717285156, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6406508088111877, "rewards/format_reward_step_strict": 0.96875, "step": 155 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.492172946974018e-08, "aux_brier/mean_group_std": 0.06266845824162358, "aux_brier/mean_r": 0.9578044682141771, "aux_brier/n_active_tok": 294.875, "aux_brier/n_groups": 16.59375, "aux_brier/n_step_records": 73.71875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5657978995901639, "calib/avg_num_step_conf": 9.54296875, "calib/ece": 0.38716, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": -0.0013780737704918444, "calib/mean_conf": 0.14036, "calib/mu_c": 0.1396875, "calib/mu_w": 0.14106557377049184, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0077599999999999995, "calib/std_conf": 0.08532450058453316, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2693.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 563.61328125, "completions/mean_terminated_length": 570.2964477539062, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1664, "grad_norm": 0.2925337851047516, "learning_rate": 1.2222222222222223e-06, "loss": -0.0149, "num_tokens": 33385071.0, "reward": 1.1328585147857666, "reward_std": 0.26367974281311035, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5861214995384216, "rewards/format_reward_step_strict": 0.97265625, "step": 156 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.6625511922718772e-07, "aux_brier/mean_group_std": 0.06699555604094448, "aux_brier/mean_r": 0.9519206889973116, "aux_brier/n_active_tok": 300.875, "aux_brier/n_groups": 17.1875, "aux_brier/n_step_records": 75.21875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6571286472148542, "calib/avg_num_step_conf": 9.58203125, "calib/ece": 0.44301204819277107, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02359549071618039, "calib/mean_conf": 0.1393172690763052, "calib/mu_c": 0.14917241379310348, "calib/mu_w": 0.12557692307692309, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04394154275501317, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2596.0, "completions/max_terminated_length": 2596.0, "completions/mean_length": 580.53515625, "completions/mean_terminated_length": 585.1063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.16746666666666668, "grad_norm": 0.0340881273150444, "learning_rate": 1.1944444444444446e-06, "loss": 0.0318, "num_tokens": 33637416.0, "reward": 1.1834672689437866, "reward_std": 0.2716543674468994, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5463691353797913, "rewards/format_reward_step_strict": 0.9609375, "step": 157 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.893006712800794e-07, "aux_brier/mean_group_std": 0.04659970857217336, "aux_brier/mean_r": 0.9556051760123047, "aux_brier/n_active_tok": 283.25, "aux_brier/n_groups": 15.65625, "aux_brier/n_step_records": 70.8125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5285946605231061, "calib/avg_num_step_conf": 8.921875, "calib/ece": 0.48756972111553787, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.010868681393142676, "calib/mean_conf": 0.15235059760956174, "calib/mu_c": 0.14828025477707008, "calib/mu_w": 0.15914893617021275, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007211155378486055, "calib/std_conf": 0.08224715321997639, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 526.14453125, "completions/mean_terminated_length": 530.2874145507812, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.16853333333333334, "grad_norm": 0.029545556753873825, "learning_rate": 1.1666666666666668e-06, "loss": 0.0203, "num_tokens": 33877349.0, "reward": 1.2334338426589966, "reward_std": 0.2396588772535324, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.519672691822052, "rewards/format_reward_step_strict": 0.98046875, "step": 158 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.2961515244723785e-07, "aux_brier/mean_group_std": 0.04257286923089113, "aux_brier/mean_r": 0.9653046586147206, "aux_brier/n_active_tok": 292.125, "aux_brier/n_groups": 18.9375, "aux_brier/n_step_records": 73.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5898283534647171, "calib/avg_num_step_conf": 9.2265625, "calib/ece": 0.3743426294820717, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012827082008900176, "calib/mean_conf": 0.14358565737051793, "calib/mu_c": 0.14976923076923077, "calib/mu_w": 0.1369421487603306, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0396082866076552, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2583.0, "completions/max_terminated_length": 2583.0, "completions/mean_length": 543.1484375, "completions/mean_terminated_length": 547.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.1696, "grad_norm": 0.1250731647014618, "learning_rate": 1.138888888888889e-06, "loss": 0.0708, "num_tokens": 34121179.0, "reward": 1.1458985805511475, "reward_std": 0.21349376440048218, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5992199182510376, "rewards/format_reward_step_strict": 0.9765625, "step": 159 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.480219699256935e-06, "aux_brier/mean_group_std": 0.0334162566209208, "aux_brier/mean_r": 0.9741950480593927, "aux_brier/n_active_tok": 286.5, "aux_brier/n_groups": 14.5625, "aux_brier/n_step_records": 71.625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6212211221122113, "calib/avg_num_step_conf": 9.30078125, "calib/ece": 0.4492828685258964, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.007781518151815159, "calib/mean_conf": 0.15613545816733068, "calib/mu_c": 0.15926666666666667, "calib/mu_w": 0.1514851485148515, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003904382470119522, "calib/std_conf": 0.06574584029143392, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2403.0, "completions/max_terminated_length": 2403.0, "completions/mean_length": 520.37890625, "completions/mean_terminated_length": 526.5494384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.17066666666666666, "grad_norm": 0.1191963478922844, "learning_rate": 1.111111111111111e-06, "loss": -0.0061, "num_tokens": 34359236.0, "reward": 1.2144297361373901, "reward_std": 0.2789672613143921, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5530316233634949, "rewards/format_reward_step_strict": 0.98046875, "step": 160 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.7483956492037045e-06, "aux_brier/mean_group_std": 0.0687977784760483, "aux_brier/mean_r": 0.9512190988550542, "aux_brier/n_active_tok": 285.5, "aux_brier/n_groups": 16.28125, "aux_brier/n_step_records": 71.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5962960080927555, "calib/avg_num_step_conf": 9.25, "calib/ece": 0.5571428571428573, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019979768111431012, "calib/mean_conf": 0.1611111111111111, "calib/mu_c": 0.16674033149171272, "calib/mu_w": 0.1467605633802817, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07475592265353499, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 533.8671875, "completions/mean_terminated_length": 538.0708618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.17173333333333332, "grad_norm": 0.1318727284669876, "learning_rate": 1.0833333333333335e-06, "loss": 0.0067, "num_tokens": 34599826.0, "reward": 1.3139612674713135, "reward_std": 0.2085951864719391, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.47459492087364197, "rewards/format_reward_step_strict": 0.9765625, "step": 161 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.298125269944995e-07, "aux_brier/mean_group_std": 0.049664201148746846, "aux_brier/mean_r": 0.959903875455974, "aux_brier/n_active_tok": 281.25, "aux_brier/n_groups": 15.09375, "aux_brier/n_step_records": 70.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44551948051948054, "calib/avg_num_step_conf": 8.91796875, "calib/ece": 0.4522047244094488, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00795584415584416, "calib/mean_conf": 0.15897637795275588, "calib/mu_c": 0.15584415584415584, "calib/mu_w": 0.1638, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002440944881889764, "calib/std_conf": 0.04499054398757696, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2582.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 526.84765625, "completions/mean_terminated_length": 528.9137573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.1728, "grad_norm": 0.08773786574602127, "learning_rate": 1.0555555555555557e-06, "loss": 0.0125, "num_tokens": 34838843.0, "reward": 1.2393226623535156, "reward_std": 0.24485798180103302, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5510406494140625, "rewards/format_reward_step_strict": 0.9921875, "step": 162 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.975275894799404e-07, "aux_brier/mean_group_std": 0.04379480709244599, "aux_brier/mean_r": 0.9603404263289442, "aux_brier/n_active_tok": 314.875, "aux_brier/n_groups": 19.46875, "aux_brier/n_step_records": 78.71875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5445731389901093, "calib/avg_num_step_conf": 10.2578125, "calib/ece": 0.373413654618474, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.01171655387818843, "calib/mean_conf": 0.1727710843373494, "calib/mu_c": 0.17808823529411763, "calib/mu_w": 0.1663716814159292, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06473262747229393, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2452.0, "completions/max_terminated_length": 2452.0, "completions/mean_length": 596.85546875, "completions/mean_terminated_length": 601.5551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.17386666666666667, "grad_norm": 0.021641548722982407, "learning_rate": 1.0277777777777777e-06, "loss": 0.0425, "num_tokens": 35096470.0, "reward": 1.1708632707595825, "reward_std": 0.2582431137561798, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5975155830383301, "rewards/format_reward_step_strict": 0.97265625, "step": 163 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.549956473936303e-07, "aux_brier/mean_group_std": 0.030194568687748117, "aux_brier/mean_r": 0.9703900914496881, "aux_brier/n_active_tok": 324.375, "aux_brier/n_groups": 18.4375, "aux_brier/n_step_records": 81.09375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5226155358898722, "calib/avg_num_step_conf": 10.13671875, "calib/ece": 0.3902016129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.008477876106194687, "calib/mean_conf": 0.17786290322580645, "calib/mu_c": 0.174, "calib/mu_w": 0.18247787610619468, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01185483870967742, "calib/std_conf": 0.07882113629639674, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2622.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 631.1875, "completions/mean_terminated_length": 633.6627807617188, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.17493333333333333, "grad_norm": 0.2804113030433655, "learning_rate": 1.0000000000000002e-06, "loss": 0.0745, "num_tokens": 35364190.0, "reward": 1.158782958984375, "reward_std": 0.24492324888706207, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5882566571235657, "rewards/format_reward_step_strict": 0.96875, "step": 164 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.6713932611267523e-06, "aux_brier/mean_group_std": 0.043575130170754296, "aux_brier/mean_r": 0.9642686975868887, "aux_brier/n_active_tok": 307.125, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 76.78125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5588864402385273, "calib/avg_num_step_conf": 10.171875, "calib/ece": 0.2985140562248996, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009804252009333686, "calib/mean_conf": 0.16734939759036147, "calib/mu_c": 0.1725862068965517, "calib/mu_w": 0.16278195488721803, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.039568485817860495, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2695.0, "completions/max_terminated_length": 2695.0, "completions/mean_length": 568.49609375, "completions/mean_terminated_length": 577.5198974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.176, "grad_norm": 0.23183128237724304, "learning_rate": 9.722222222222224e-07, "loss": 0.0162, "num_tokens": 35615301.0, "reward": 1.0983335971832275, "reward_std": 0.27246785163879395, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6433343887329102, "rewards/format_reward_step_strict": 0.96875, "step": 165 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.2855281397022855e-07, "aux_brier/mean_group_std": 0.06651987752337103, "aux_brier/mean_r": 0.9438013246607929, "aux_brier/n_active_tok": 316.125, "aux_brier/n_groups": 17.4375, "aux_brier/n_step_records": 79.03125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5278820558323664, "calib/avg_num_step_conf": 10.2265625, "calib/ece": 0.4563095238095239, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000769230769230772, "calib/mean_conf": 0.18257936507936506, "calib/mu_c": 0.18285714285714283, "calib/mu_w": 0.18208791208791206, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0483137650329631, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 600.9453125, "completions/mean_terminated_length": 605.6771850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.17706666666666668, "grad_norm": 0.0220769215375185, "learning_rate": 9.444444444444445e-07, "loss": 0.0317, "num_tokens": 35875327.0, "reward": 1.2528619766235352, "reward_std": 0.2590295672416687, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.5426980257034302, "rewards/format_reward_step_strict": 0.9765625, "step": 166 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.33835688496265e-07, "aux_brier/mean_group_std": 0.05013251534438689, "aux_brier/mean_r": 0.9487601246756485, "aux_brier/n_active_tok": 307.75, "aux_brier/n_groups": 19.25, "aux_brier/n_step_records": 76.9375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5766081052705256, "calib/avg_num_step_conf": 9.81640625, "calib/ece": 0.4459677419354839, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.01416112549870513, "calib/mean_conf": 0.18709677419354842, "calib/mu_c": 0.19229299363057323, "calib/mu_w": 0.1781318681318681, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07162034980173725, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2687.0, "completions/max_terminated_length": 2687.0, "completions/mean_length": 593.51953125, "completions/mean_terminated_length": 600.5573120117188, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.17813333333333334, "grad_norm": 0.12341222912073135, "learning_rate": 9.166666666666666e-07, "loss": 0.0711, "num_tokens": 36132876.0, "reward": 1.2357680797576904, "reward_std": 0.26579955220222473, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5524476766586304, "rewards/format_reward_step_strict": 0.96875, "step": 167 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.599532062663723e-07, "aux_brier/mean_group_std": 0.07778371042239708, "aux_brier/mean_r": 0.9417164956729162, "aux_brier/n_active_tok": 324.0, "aux_brier/n_groups": 19.15625, "aux_brier/n_step_records": 81.0, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5481107800650055, "calib/avg_num_step_conf": 10.50390625, "calib/ece": 0.39934959349593496, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007831798483206975, "calib/mean_conf": 0.17788617886178865, "calib/mu_c": 0.18119718309859156, "calib/mu_w": 0.1733653846153846, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04079000420596301, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2611.0, "completions/max_terminated_length": 2611.0, "completions/mean_length": 607.4453125, "completions/mean_terminated_length": 617.0873413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.1792, "grad_norm": 0.1288532018661499, "learning_rate": 8.88888888888889e-07, "loss": 0.0463, "num_tokens": 36393054.0, "reward": 1.1789710521697998, "reward_std": 0.30341869592666626, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5752593278884888, "rewards/format_reward_step_strict": 0.9609375, "step": 168 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.4110437178138646e-06, "aux_brier/mean_group_std": 0.04138852539772683, "aux_brier/mean_r": 0.9618776345813835, "aux_brier/n_active_tok": 306.5, "aux_brier/n_groups": 16.84375, "aux_brier/n_step_records": 76.625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6126653439153439, "calib/avg_num_step_conf": 10.2265625, "calib/ece": 0.3870564516129032, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016166666666666635, "calib/mean_conf": 0.17745967741935487, "calib/mu_c": 0.1845, "calib/mu_w": 0.16833333333333336, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04478891337358236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2643.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 574.2890625, "completions/mean_terminated_length": 588.072021484375, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.18026666666666666, "grad_norm": 0.30349060893058777, "learning_rate": 8.611111111111112e-07, "loss": -0.0289, "num_tokens": 36644256.0, "reward": 1.1761419773101807, "reward_std": 0.251577228307724, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5873804688453674, "rewards/format_reward_step_strict": 0.96484375, "step": 169 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.2160936344873807e-06, "aux_brier/mean_group_std": 0.04908099203046632, "aux_brier/mean_r": 0.9580694977165491, "aux_brier/n_active_tok": 314.875, "aux_brier/n_groups": 16.09375, "aux_brier/n_step_records": 78.71875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5563486616334935, "calib/avg_num_step_conf": 10.328125, "calib/ece": 0.4449799196787149, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004921070693205243, "calib/mean_conf": 0.18072289156626506, "calib/mu_c": 0.1825806451612903, "calib/mu_w": 0.17765957446808506, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0016064257028112448, "calib/std_conf": 0.03979212448155175, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2064.0, "completions/max_terminated_length": 2064.0, "completions/mean_length": 578.5546875, "completions/mean_terminated_length": 585.4150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.18133333333333335, "grad_norm": 0.14562365412712097, "learning_rate": 8.333333333333333e-07, "loss": 0.0099, "num_tokens": 36896518.0, "reward": 1.2218576669692993, "reward_std": 0.2752302885055542, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5436804890632629, "rewards/format_reward_step_strict": 0.9609375, "step": 170 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.1814434455748035e-06, "aux_brier/mean_group_std": 0.053518122634929895, "aux_brier/mean_r": 0.9501083085068752, "aux_brier/n_active_tok": 315.125, "aux_brier/n_groups": 16.90625, "aux_brier/n_step_records": 78.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6047500000000001, "calib/avg_num_step_conf": 10.27734375, "calib/ece": 0.32648221343873524, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018338749999999987, "calib/mean_conf": 0.18015810276679842, "calib/mu_c": 0.18921875, "calib/mu_w": 0.17088, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00035573122529644266, "calib/std_conf": 0.04962283341769699, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2884.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 573.12109375, "completions/mean_terminated_length": 577.6338500976562, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.1824, "grad_norm": 0.024209948256611824, "learning_rate": 8.055555555555557e-07, "loss": 0.0182, "num_tokens": 37150133.0, "reward": 1.1526150703430176, "reward_std": 0.277605801820755, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6417101621627808, "rewards/format_reward_step_strict": 0.984375, "step": 171 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.2334762645727082e-06, "aux_brier/mean_group_std": 0.040059631960310045, "aux_brier/mean_r": 0.9610557823416042, "aux_brier/n_active_tok": 304.875, "aux_brier/n_groups": 16.4375, "aux_brier/n_step_records": 76.21875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4816964285714286, "calib/avg_num_step_conf": 9.546875, "calib/ece": 0.4575298804780876, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0013612637362637459, "calib/mean_conf": 0.18286852589641436, "calib/mu_c": 0.18237499999999998, "calib/mu_w": 0.18373626373626373, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001474103585657371, "calib/std_conf": 0.0393743228034895, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2581.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 536.265625, "completions/mean_terminated_length": 542.62451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.18346666666666667, "grad_norm": 0.10507678985595703, "learning_rate": 7.777777777777779e-07, "loss": -0.0064, "num_tokens": 37390769.0, "reward": 1.2439298629760742, "reward_std": 0.26565784215927124, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.5382195711135864, "rewards/format_reward_step_strict": 0.96875, "step": 172 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.35707126611851e-07, "aux_brier/mean_group_std": 0.06786904352989624, "aux_brier/mean_r": 0.9430748218994833, "aux_brier/n_active_tok": 309.625, "aux_brier/n_groups": 16.53125, "aux_brier/n_step_records": 77.40625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5970543557228717, "calib/avg_num_step_conf": 9.9453125, "calib/ece": 0.40015999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.018617000198137446, "calib/mean_conf": 0.18783999999999998, "calib/mu_c": 0.19551020408163264, "calib/mu_w": 0.1768932038834952, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06652619333766212, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2432.0, "completions/max_terminated_length": 2432.0, "completions/mean_length": 580.0234375, "completions/mean_terminated_length": 584.590576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.18453333333333333, "grad_norm": 0.2762625217437744, "learning_rate": 7.5e-07, "loss": 0.0359, "num_tokens": 37642415.0, "reward": 1.2034776210784912, "reward_std": 0.2693626284599304, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5795355439186096, "rewards/format_reward_step_strict": 0.9609375, "step": 173 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.6533995772792807e-07, "aux_brier/mean_group_std": 0.0577192916816698, "aux_brier/mean_r": 0.9514894683358318, "aux_brier/n_active_tok": 303.375, "aux_brier/n_groups": 16.4375, "aux_brier/n_step_records": 75.84375, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6169354838709677, "calib/avg_num_step_conf": 9.5703125, "calib/ece": 0.3134016393442623, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014459677419354833, "calib/mean_conf": 0.17840163934426229, "calib/mu_c": 0.18575, "calib/mu_w": 0.17129032258064517, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.037996798794425114, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2874.0, "completions/max_terminated_length": 2874.0, "completions/mean_length": 640.51171875, "completions/mean_terminated_length": 645.5551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.1856, "grad_norm": 0.16314463317394257, "learning_rate": 7.222222222222222e-07, "loss": 0.0737, "num_tokens": 37910618.0, "reward": 1.0991191864013672, "reward_std": 0.3627810478210449, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6230390667915344, "rewards/format_reward_step_strict": 0.94921875, "step": 174 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.430361870335961e-07, "aux_brier/mean_group_std": 0.04861527252577536, "aux_brier/mean_r": 0.9552268439595475, "aux_brier/n_active_tok": 329.875, "aux_brier/n_groups": 20.9375, "aux_brier/n_step_records": 82.46875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5253977968176254, "calib/avg_num_step_conf": 10.55078125, "calib/ece": 0.2934979423868313, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.00411522633744856, "calib/gap": -0.004075887392900823, "calib/mean_conf": 0.1911111111111111, "calib/mu_c": 0.18894736842105264, "calib/mu_w": 0.19302325581395346, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007736625514403293, "calib/std_conf": 0.07984552988098526, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2722.0, "completions/max_terminated_length": 2722.0, "completions/mean_length": 665.56640625, "completions/mean_terminated_length": 676.1309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.18666666666666668, "grad_norm": 0.12036768347024918, "learning_rate": 6.944444444444446e-07, "loss": 0.0299, "num_tokens": 38186827.0, "reward": 1.0777885913848877, "reward_std": 0.2837650775909424, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6314672231674194, "rewards/format_reward_step_strict": 0.94921875, "step": 175 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.174230180455709e-07, "aux_brier/mean_group_std": 0.08334401902031244, "aux_brier/mean_r": 0.9388947683025433, "aux_brier/n_active_tok": 320.75, "aux_brier/n_groups": 18.15625, "aux_brier/n_step_records": 80.1875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5531506147540983, "calib/avg_num_step_conf": 10.12109375, "calib/ece": 0.30643999999999993, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00463627049180329, "calib/mean_conf": 0.18819999999999998, "calib/mu_c": 0.19057377049180327, "calib/mu_w": 0.18593749999999998, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0033200000000000005, "calib/std_conf": 0.05180308871100255, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2902.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 605.5, "completions/mean_terminated_length": 607.8745727539062, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.18773333333333334, "grad_norm": 0.08937592804431915, "learning_rate": 6.666666666666667e-07, "loss": 0.0775, "num_tokens": 38445899.0, "reward": 1.1259515285491943, "reward_std": 0.24563716351985931, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6444308757781982, "rewards/format_reward_step_strict": 0.9765625, "step": 176 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.7630749749031338e-07, "aux_brier/mean_group_std": 0.04622896489964074, "aux_brier/mean_r": 0.9582977720744049, "aux_brier/n_active_tok": 326.75, "aux_brier/n_groups": 19.3125, "aux_brier/n_step_records": 81.6875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5487773321257601, "calib/avg_num_step_conf": 10.62890625, "calib/ece": 0.34775100401606424, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013222926639927246, "calib/mean_conf": 0.18967871485943777, "calib/mu_c": 0.19030534351145037, "calib/mu_w": 0.18898305084745765, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005662650602409638, "calib/std_conf": 0.06390172855568649, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 629.140625, "completions/mean_terminated_length": 631.60791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 234.0, "epoch": 0.1888, "grad_norm": 0.054024118930101395, "learning_rate": 6.388888888888889e-07, "loss": 0.0862, "num_tokens": 38710791.0, "reward": 1.147057056427002, "reward_std": 0.24264121055603027, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6116660833358765, "rewards/format_reward_step_strict": 0.96484375, "step": 177 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.4876302640031156e-07, "aux_brier/mean_group_std": 0.05794493050943693, "aux_brier/mean_r": 0.9485298124462436, "aux_brier/n_active_tok": 322.0, "aux_brier/n_groups": 18.09375, "aux_brier/n_step_records": 80.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5144155844155844, "calib/avg_num_step_conf": 10.0625, "calib/ece": 0.4217322834645669, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002633766233766238, "calib/mean_conf": 0.1851968503937008, "calib/mu_c": 0.18623376623376622, "calib/mu_w": 0.18359999999999999, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00031496062992125983, "calib/std_conf": 0.03993795175118499, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2944.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 581.05078125, "completions/mean_terminated_length": 581.05078125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.18986666666666666, "grad_norm": 0.2664334774017334, "learning_rate": 6.111111111111112e-07, "loss": 0.0883, "num_tokens": 38965612.0, "reward": 1.2366094589233398, "reward_std": 0.2786281108856201, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.5714383125305176, "rewards/format_reward_step_strict": 0.984375, "step": 178 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.297074625297386e-07, "aux_brier/mean_group_std": 0.051580070457991366, "aux_brier/mean_r": 0.9524757367183885, "aux_brier/n_active_tok": 329.875, "aux_brier/n_groups": 19.0, "aux_brier/n_step_records": 82.46875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5602791426333404, "calib/avg_num_step_conf": 10.68359375, "calib/ece": 0.43336065573770494, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.00819672131147541, "calib/gap": 0.0008388520971302493, "calib/mean_conf": 0.19385245901639342, "calib/mu_c": 0.19417218543046358, "calib/mu_w": 0.19333333333333333, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004180327868852459, "calib/std_conf": 0.0820730054991174, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 611.109375, "completions/mean_terminated_length": 623.2828979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 269.0, "epoch": 0.19093333333333334, "grad_norm": 0.21540477871894836, "learning_rate": 5.833333333333334e-07, "loss": 0.0806, "num_tokens": 39228320.0, "reward": 1.2010421752929688, "reward_std": 0.32985225319862366, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5463562607765198, "rewards/format_reward_step_strict": 0.94921875, "step": 179 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.247282946294288e-07, "aux_brier/mean_group_std": 0.06468195060806523, "aux_brier/mean_r": 0.9402379308001811, "aux_brier/n_active_tok": 326.0, "aux_brier/n_groups": 17.84375, "aux_brier/n_step_records": 81.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5629285337552743, "calib/avg_num_step_conf": 10.30078125, "calib/ece": 0.4245275590551181, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.007372099156118139, "calib/mean_conf": 0.20531496062992127, "calib/mu_c": 0.2081012658227848, "calib/mu_w": 0.20072916666666665, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00389763779527559, "calib/std_conf": 0.07317664968857292, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2416.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 645.94140625, "completions/mean_terminated_length": 648.4745483398438, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.192, "grad_norm": 0.016230851411819458, "learning_rate": 5.555555555555555e-07, "loss": -0.0057, "num_tokens": 39497537.0, "reward": 1.2594654560089111, "reward_std": 0.2163536101579666, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.5847371220588684, "rewards/format_reward_step_strict": 0.9921875, "step": 180 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.806177327701164e-07, "aux_brier/mean_group_std": 0.041868387831200275, "aux_brier/mean_r": 0.9589451704188524, "aux_brier/n_active_tok": 335.0, "aux_brier/n_groups": 19.46875, "aux_brier/n_step_records": 83.75, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5820322580645162, "calib/avg_num_step_conf": 10.97265625, "calib/ece": 0.3192369477911647, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035651612903226104, "calib/mean_conf": 0.19385542168674696, "calib/mu_c": 0.19564516129032258, "calib/mu_w": 0.19207999999999997, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00755020080321285, "calib/std_conf": 0.07320814785508771, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2800.0, "completions/max_terminated_length": 2800.0, "completions/mean_length": 583.48046875, "completions/mean_terminated_length": 592.7421264648438, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.19306666666666666, "grad_norm": 0.1458035260438919, "learning_rate": 5.277777777777779e-07, "loss": 0.041, "num_tokens": 39753172.0, "reward": 1.1297149658203125, "reward_std": 0.24853432178497314, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6360472440719604, "rewards/format_reward_step_strict": 0.97265625, "step": 181 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.120180851745591e-07, "aux_brier/mean_group_std": 0.06463662238827718, "aux_brier/mean_r": 0.9461888610953979, "aux_brier/n_active_tok": 359.625, "aux_brier/n_groups": 23.40625, "aux_brier/n_step_records": 89.90625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5073534661240808, "calib/avg_num_step_conf": 11.47265625, "calib/ece": 0.46689516129032255, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.008064516129032258, "calib/gap": -0.013689583779538828, "calib/mean_conf": 0.2015725806451613, "calib/mu_c": 0.1967701863354037, "calib/mu_w": 0.21045977011494252, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009637096774193547, "calib/std_conf": 0.09607386887409627, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2745.0, "completions/max_terminated_length": 2745.0, "completions/mean_length": 640.58984375, "completions/mean_terminated_length": 643.1019897460938, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.19413333333333332, "grad_norm": 0.12417051941156387, "learning_rate": 5.000000000000001e-07, "loss": 0.0344, "num_tokens": 40023323.0, "reward": 1.2451366186141968, "reward_std": 0.2409558892250061, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.5352339744567871, "rewards/format_reward_step_strict": 0.96484375, "step": 182 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.3778141284024343e-07, "aux_brier/mean_group_std": 0.04932587647033141, "aux_brier/mean_r": 0.9535072317581955, "aux_brier/n_active_tok": 334.0, "aux_brier/n_groups": 21.1875, "aux_brier/n_step_records": 83.5, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4950993555316864, "calib/avg_num_step_conf": 10.5390625, "calib/ece": 0.34751020408163263, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013406954887218075, "calib/mean_conf": 0.19951020408163267, "calib/mu_c": 0.2056390977443609, "calib/mu_w": 0.19223214285714282, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0020816326530612244, "calib/std_conf": 0.09193478665835655, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 670.6796875, "completions/mean_terminated_length": 675.9606323242188, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.1952, "grad_norm": 0.03099486045539379, "learning_rate": 4.7222222222222226e-07, "loss": 0.0785, "num_tokens": 40301697.0, "reward": 1.1382991075515747, "reward_std": 0.3700042963027954, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5922585725784302, "rewards/format_reward_step_strict": 0.94140625, "step": 183 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.974200826905587e-09, "aux_brier/mean_group_std": 0.0670935194535348, "aux_brier/mean_r": 0.9483376751504038, "aux_brier/n_active_tok": 307.125, "aux_brier/n_groups": 15.09375, "aux_brier/n_step_records": 76.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5974809761217528, "calib/avg_num_step_conf": 9.8203125, "calib/ece": 0.3993625498007969, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012492784046182148, "calib/mean_conf": 0.19027888446215135, "calib/mu_c": 0.1954054054054054, "calib/mu_w": 0.18291262135922326, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04007366116384052, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2572.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 581.9609375, "completions/mean_terminated_length": 586.5433349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 243.0, "epoch": 0.19626666666666667, "grad_norm": 0.07316029071807861, "learning_rate": 4.444444444444445e-07, "loss": 0.0459, "num_tokens": 40555959.0, "reward": 1.2161612510681152, "reward_std": 0.2546941339969635, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5912078022956848, "rewards/format_reward_step_strict": 0.98046875, "step": 184 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.893121549111925e-07, "aux_brier/mean_group_std": 0.07092038911043529, "aux_brier/mean_r": 0.937299235638441, "aux_brier/n_active_tok": 338.875, "aux_brier/n_groups": 22.5, "aux_brier/n_step_records": 84.71875, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.5733463598205241, "calib/avg_num_step_conf": 11.4140625, "calib/ece": 0.4256016597510373, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005728759588941951, "calib/mean_conf": 0.19647302904564312, "calib/mu_c": 0.19870748299319727, "calib/mu_w": 0.19297872340425531, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006058091286307054, "calib/std_conf": 0.07369619352868514, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2751.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 613.64453125, "completions/mean_terminated_length": 630.8955688476562, "completions/min_length": 0.0, "completions/min_terminated_length": 244.0, "epoch": 0.19733333333333333, "grad_norm": 0.04127538576722145, "learning_rate": 4.1666666666666667e-07, "loss": -0.019, "num_tokens": 40819972.0, "reward": 1.1787972450256348, "reward_std": 0.32682564854621887, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5511261820793152, "rewards/format_reward_step_strict": 0.93359375, "step": 185 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.4500213896262437e-07, "aux_brier/mean_group_std": 0.05271762792050771, "aux_brier/mean_r": 0.9556347104159724, "aux_brier/n_active_tok": 328.5, "aux_brier/n_groups": 16.65625, "aux_brier/n_step_records": 82.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5622324452111687, "calib/avg_num_step_conf": 10.26953125, "calib/ece": 0.3754761904761904, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.0025876940770558354, "calib/mean_conf": 0.20404761904761903, "calib/mu_c": 0.20290780141843973, "calib/mu_w": 0.20549549549549556, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009999999999999998, "calib/std_conf": 0.08374354816587623, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 641.70703125, "completions/mean_terminated_length": 641.70703125, "completions/min_length": 223.0, "completions/min_terminated_length": 223.0, "epoch": 0.1984, "grad_norm": 0.04874926805496216, "learning_rate": 3.8888888888888895e-07, "loss": 0.0724, "num_tokens": 41089289.0, "reward": 1.187770128250122, "reward_std": 0.24495184421539307, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6026433706283569, "rewards/format_reward_step_strict": 0.97265625, "step": 186 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.899584302193617e-07, "aux_brier/mean_group_std": 0.05150954006876684, "aux_brier/mean_r": 0.9477049048969173, "aux_brier/n_active_tok": 402.875, "aux_brier/n_groups": 30.0, "aux_brier/n_step_records": 100.71875, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.474323386537127, "calib/avg_num_step_conf": 12.73046875, "calib/ece": 0.3715336099585062, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01385443442054135, "calib/mean_conf": 0.19601825726141078, "calib/mu_c": 0.18969465648854958, "calib/mu_w": 0.20354909090909093, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011991701244813278, "calib/std_conf": 0.0640742333899886, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 724.9140625, "completions/mean_terminated_length": 730.6220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.19946666666666665, "grad_norm": 0.09774050116539001, "learning_rate": 3.611111111111111e-07, "loss": 0.0867, "num_tokens": 41376411.0, "reward": 1.1225202083587646, "reward_std": 0.29422616958618164, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5760183334350586, "rewards/format_reward_step_strict": 0.93359375, "step": 187 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.1324720969874136e-06, "aux_brier/mean_group_std": 0.058332879115439454, "aux_brier/mean_r": 0.9388424391540364, "aux_brier/n_active_tok": 356.0, "aux_brier/n_groups": 22.71875, "aux_brier/n_step_records": 89.0, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.45626342642320084, "calib/avg_num_step_conf": 11.13671875, "calib/ece": 0.42212, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": -0.018939312567132133, "calib/mean_conf": 0.21124, "calib/mu_c": 0.2038157894736842, "calib/mu_w": 0.22275510204081633, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.01268, "calib/std_conf": 0.10904523098237721, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 675.4609375, "completions/mean_terminated_length": 678.10986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.20053333333333334, "grad_norm": 0.018280059099197388, "learning_rate": 3.3333333333333335e-07, "loss": 0.0502, "num_tokens": 41653401.0, "reward": 1.2164448499679565, "reward_std": 0.2764585018157959, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5610917806625366, "rewards/format_reward_step_strict": 0.96484375, "step": 188 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.425059591499746e-08, "aux_brier/mean_group_std": 0.06300626618321063, "aux_brier/mean_r": 0.9338068102297048, "aux_brier/n_active_tok": 313.375, "aux_brier/n_groups": 18.375, "aux_brier/n_step_records": 78.34375, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5577608665843959, "calib/avg_num_step_conf": 10.26953125, "calib/ece": 0.39285714285714285, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009125188536953183, "calib/mean_conf": 0.19081632653061228, "calib/mu_c": 0.1946153846153846, "calib/mu_w": 0.18549019607843142, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04165145633800878, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2559.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 604.39453125, "completions/mean_terminated_length": 611.561279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.2016, "grad_norm": 0.06164390221238136, "learning_rate": 3.055555555555556e-07, "loss": 0.0641, "num_tokens": 41915894.0, "reward": 1.1790344715118408, "reward_std": 0.2340812385082245, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5755125284194946, "rewards/format_reward_step_strict": 0.953125, "step": 189 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.969700202472893e-07, "aux_brier/mean_group_std": 0.06483934127158777, "aux_brier/mean_r": 0.9460970959418037, "aux_brier/n_active_tok": 371.5, "aux_brier/n_groups": 21.375, "aux_brier/n_step_records": 92.875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5802218974222696, "calib/avg_num_step_conf": 11.765625, "calib/ece": 0.3767741935483871, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014863141110815797, "calib/mean_conf": 0.20266129032258068, "calib/mu_c": 0.20901408450704226, "calib/mu_w": 0.19415094339622646, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0034274193548387093, "calib/std_conf": 0.07059778017811186, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 666.2421875, "completions/mean_terminated_length": 674.142333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.20266666666666666, "grad_norm": 0.11814999580383301, "learning_rate": 2.7777777777777776e-07, "loss": 0.0473, "num_tokens": 42192060.0, "reward": 1.181680679321289, "reward_std": 0.3146505057811737, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5939102172851562, "rewards/format_reward_step_strict": 0.95703125, "step": 190 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.2416882844433772e-07, "aux_brier/mean_group_std": 0.04537874532034743, "aux_brier/mean_r": 0.9516971164881491, "aux_brier/n_active_tok": 377.75, "aux_brier/n_groups": 21.96875, "aux_brier/n_step_records": 94.4375, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.6035618279569892, "calib/avg_num_step_conf": 12.23046875, "calib/ece": 0.30159836065573764, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020198924731182788, "calib/mean_conf": 0.2065983606557377, "calib/mu_c": 0.21653225806451615, "calib/mu_w": 0.19633333333333336, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06420627105831789, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 644.0234375, "completions/mean_terminated_length": 651.6600952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.20373333333333332, "grad_norm": 0.046372488141059875, "learning_rate": 2.5000000000000004e-07, "loss": 0.0722, "num_tokens": 42461098.0, "reward": 1.1078917980194092, "reward_std": 0.2727097272872925, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6190671920776367, "rewards/format_reward_step_strict": 0.9375, "step": 191 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.9225027770317666e-07, "aux_brier/mean_group_std": 0.05678803464707159, "aux_brier/mean_r": 0.9381862612549897, "aux_brier/n_active_tok": 346.125, "aux_brier/n_groups": 21.96875, "aux_brier/n_step_records": 86.53125, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.4678402903811252, "calib/avg_num_step_conf": 11.12109375, "calib/ece": 0.4224166666666666, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.008333333333333333, "calib/gap": -0.014548094373865783, "calib/mean_conf": 0.201, "calib/mu_c": 0.19524137931034483, "calib/mu_w": 0.2097894736842106, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.009625, "calib/std_conf": 0.09888039913619551, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 661.5234375, "completions/mean_terminated_length": 672.0238647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.2048, "grad_norm": 0.034542765468358994, "learning_rate": 2.2222222222222224e-07, "loss": 0.0716, "num_tokens": 42735424.0, "reward": 1.16123366355896, "reward_std": 0.3240078389644623, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5355597734451294, "rewards/format_reward_step_strict": 0.921875, "step": 192 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.24852955260846e-07, "aux_brier/mean_group_std": 0.0412554376989771, "aux_brier/mean_r": 0.9550871661785169, "aux_brier/n_active_tok": 336.375, "aux_brier/n_groups": 18.59375, "aux_brier/n_step_records": 84.09375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5379738729508197, "calib/avg_num_step_conf": 10.9296875, "calib/ece": 0.31508, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.005493084016393424, "calib/mean_conf": 0.20395999999999997, "calib/mu_c": 0.206640625, "calib/mu_w": 0.20114754098360657, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0035199999999999997, "calib/std_conf": 0.08556587170128054, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2364.0, "completions/max_terminated_length": 2364.0, "completions/mean_length": 600.140625, "completions/mean_terminated_length": 612.0956420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 280.0, "epoch": 0.20586666666666667, "grad_norm": 0.12756557762622833, "learning_rate": 1.9444444444444447e-07, "loss": -0.0083, "num_tokens": 42994772.0, "reward": 1.1384763717651367, "reward_std": 0.34489667415618896, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6242179870605469, "rewards/format_reward_step_strict": 0.96484375, "step": 193 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.371264476192959e-07, "aux_brier/mean_group_std": 0.07162886939300656, "aux_brier/mean_r": 0.9427285563837255, "aux_brier/n_active_tok": 319.25, "aux_brier/n_groups": 17.6875, "aux_brier/n_step_records": 79.8125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.647510593220339, "calib/avg_num_step_conf": 10.54296875, "calib/ece": 0.33292682926829276, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021532044491525487, "calib/mean_conf": 0.18959349593495936, "calib/mu_c": 0.19992187500000003, "calib/mu_w": 0.17838983050847454, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0010975609756097562, "calib/std_conf": 0.04981099845870625, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2583.0, "completions/max_terminated_length": 2583.0, "completions/mean_length": 580.15234375, "completions/mean_terminated_length": 591.7091674804688, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.20693333333333333, "grad_norm": 0.038445133715867996, "learning_rate": 1.6666666666666668e-07, "loss": -0.0021, "num_tokens": 43249235.0, "reward": 1.133557677268982, "reward_std": 0.27232956886291504, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6201683282852173, "rewards/format_reward_step_strict": 0.95703125, "step": 194 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.460890020985264e-07, "aux_brier/mean_group_std": 0.05744066556824241, "aux_brier/mean_r": 0.9387691897144076, "aux_brier/n_active_tok": 331.875, "aux_brier/n_groups": 20.84375, "aux_brier/n_step_records": 82.96875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.47642195767195766, "calib/avg_num_step_conf": 10.64453125, "calib/ece": 0.35194331983805666, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.008097165991902834, "calib/gap": -0.0006633597883598241, "calib/mean_conf": 0.20222672064777322, "calib/mu_c": 0.2019259259259259, "calib/mu_w": 0.20258928571428572, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003805668016194332, "calib/std_conf": 0.08245905139190195, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2957.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 595.90625, "completions/mean_terminated_length": 602.9723510742188, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.208, "grad_norm": 0.10510091483592987, "learning_rate": 1.3888888888888888e-07, "loss": 0.0362, "num_tokens": 43507771.0, "reward": 1.1608781814575195, "reward_std": 0.2738897204399109, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6044504046440125, "rewards/format_reward_step_strict": 0.96484375, "step": 195 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.751512754947896e-07, "aux_brier/mean_group_std": 0.05371166723777047, "aux_brier/mean_r": 0.9496463667116218, "aux_brier/n_active_tok": 331.5, "aux_brier/n_groups": 20.09375, "aux_brier/n_step_records": 82.875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5146788990825688, "calib/avg_num_step_conf": 10.359375, "calib/ece": 0.36634538152610435, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002650720838794224, "calib/mean_conf": 0.19919678714859435, "calib/mu_c": 0.20035714285714284, "calib/mu_w": 0.19770642201834862, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001646586345381526, "calib/std_conf": 0.04424463561656084, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2733.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 556.12890625, "completions/mean_terminated_length": 558.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.20906666666666668, "grad_norm": 0.050895657390356064, "learning_rate": 1.1111111111111112e-07, "loss": 0.0711, "num_tokens": 43752684.0, "reward": 1.1843090057373047, "reward_std": 0.23462121188640594, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6044234037399292, "rewards/format_reward_step_strict": 0.97265625, "step": 196 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.12161457244531e-07, "aux_brier/mean_group_std": 0.08229145749638564, "aux_brier/mean_r": 0.932691426152198, "aux_brier/n_active_tok": 332.25, "aux_brier/n_groups": 18.75, "aux_brier/n_step_records": 83.0625, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.61247947454844, "calib/avg_num_step_conf": 10.6015625, "calib/ece": 0.3320247933884297, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013883415435139546, "calib/mean_conf": 0.19929752066115705, "calib/mu_c": 0.20595238095238097, "calib/mu_w": 0.19206896551724142, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005330578512396694, "calib/std_conf": 0.06305515683725031, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2876.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 621.7421875, "completions/mean_terminated_length": 631.6111450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.21013333333333334, "grad_norm": 0.022467566654086113, "learning_rate": 8.333333333333334e-08, "loss": 0.1037, "num_tokens": 44016906.0, "reward": 1.1161017417907715, "reward_std": 0.27242499589920044, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6128445267677307, "rewards/format_reward_step_strict": 0.94140625, "step": 197 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.942384199211006e-07, "aux_brier/mean_group_std": 0.0473581039447935, "aux_brier/mean_r": 0.9577410146574017, "aux_brier/n_active_tok": 338.625, "aux_brier/n_groups": 21.90625, "aux_brier/n_step_records": 84.65625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5420021470746108, "calib/avg_num_step_conf": 10.890625, "calib/ece": 0.36963414634146347, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": 0.015100644122383239, "calib/mean_conf": 0.19134146341463415, "calib/mu_c": 0.19797101449275362, "calib/mu_w": 0.18287037037037038, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07009451566501977, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 603.48828125, "completions/mean_terminated_length": 613.0675048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.2112, "grad_norm": 0.07373571395874023, "learning_rate": 5.555555555555556e-08, "loss": 0.0393, "num_tokens": 44276783.0, "reward": 1.1614110469818115, "reward_std": 0.2225344032049179, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5909566283226013, "rewards/format_reward_step_strict": 0.94921875, "step": 198 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.823768354392019e-08, "aux_brier/mean_group_std": 0.05707974939527955, "aux_brier/mean_r": 0.9469600337955227, "aux_brier/n_active_tok": 377.625, "aux_brier/n_groups": 22.5, "aux_brier/n_step_records": 94.40625, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.4645641882483988, "calib/avg_num_step_conf": 12.3828125, "calib/ece": 0.3563485477178423, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.008298755186721992, "calib/gap": -0.02200779727095517, "calib/mean_conf": 0.21933609958506223, "calib/mu_c": 0.2094736842105263, "calib/mu_w": 0.23148148148148148, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011908713692946056, "calib/std_conf": 0.08807517008094146, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2848.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 656.34765625, "completions/mean_terminated_length": 680.26318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.21226666666666666, "grad_norm": 0.13624730706214905, "learning_rate": 2.777777777777778e-08, "loss": 0.0144, "num_tokens": 44549008.0, "reward": 1.1288948059082031, "reward_std": 0.35054564476013184, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5780788660049438, "rewards/format_reward_step_strict": 0.9296875, "step": 199 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.408442279396017e-07, "aux_brier/mean_group_std": 0.05124705039918557, "aux_brier/mean_r": 0.9541607324915391, "aux_brier/n_active_tok": 334.25, "aux_brier/n_groups": 20.625, "aux_brier/n_step_records": 83.5625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5763718263718264, "calib/avg_num_step_conf": 10.86328125, "calib/ece": 0.41603238866396763, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.008097165991902834, "calib/gap": 0.0011322686322685904, "calib/mean_conf": 0.20582995951417005, "calib/mu_c": 0.20628378378378376, "calib/mu_w": 0.20515151515151517, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011336032388663968, "calib/std_conf": 0.09908089481737702, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2744.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 611.85546875, "completions/mean_terminated_length": 621.5675048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.21333333333333335, "grad_norm": 0.10868275910615921, "learning_rate": 0.0, "loss": 0.004, "num_tokens": 44813691.0, "reward": 1.199366807937622, "reward_std": 0.2432486116886139, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5709050893783569, "rewards/format_reward_step_strict": 0.95703125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.03981497859931551, "train_runtime": 16137.1786, "train_samples_per_second": 3.173, "train_steps_per_second": 0.012 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44813691, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }