{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.8019087314605713, "learning_rate": 2.5000000000000004e-07, "loss": 0.0332, "num_tokens": 264685.0, "reward": 0.03929531201720238, "reward_std": 0.08434611558914185, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step_strict": 0.0390625, "step": 1 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.0070538897998631, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step_strict": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.9270137424930454e-09, "aux_brier/mean_group_std": 0.016647923275186365, "aux_brier/mean_r": 0.36642824813769415, "aux_brier/n_active_tok": 22.666666666666668, "aux_brier/n_groups": 5.416666666666667, "aux_brier/n_step_records": 5.666666666666667, "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.296875, "calib/ece": 0.8140714285714286, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9285714285714286, "calib/gap": 0.03858333333333319, "calib/mean_conf": 0.9569285714285716, "calib/mu_c": 0.99, "calib/mu_w": 0.9514166666666668, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.8140714285714286, "calib/std_conf": 0.0642744831459069, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 650.859375, "completions/mean_terminated_length": 718.1896362304688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.0033897303510457277, "learning_rate": 7.5e-07, "loss": 0.0223, "num_tokens": 805343.0, "reward": 0.026312783360481262, "reward_std": 0.058088187128305435, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.011501136235892773, "rewards/format_reward_step_strict": 0.03125, "step": 3 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.31322515403584e-09, "aux_brier/mean_group_std": 0.037747883123163066, "aux_brier/mean_r": 0.42610642501873497, "aux_brier/n_active_tok": 24.0, "aux_brier/n_groups": 4.583333333333333, "aux_brier/n_step_records": 6.0, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.22222222222222227, "calib/avg_num_step_conf": 0.30078125, "calib/ece": 0.8589999999999999, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": -0.01000000000000012, "calib/mean_conf": 0.959, "calib/mu_c": 0.95, "calib/mu_w": 0.9600000000000001, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.8589999999999999, "calib/std_conf": 0.02913760456866693, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 660.6953125, "completions/mean_terminated_length": 748.3982543945312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 1.0983048677444458, "learning_rate": 1.0000000000000002e-06, "loss": 0.031, "num_tokens": 1080649.0, "reward": 0.021128516644239426, "reward_std": 0.055777207016944885, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.006389062851667404, "rewards/format_reward_step_strict": 0.03125, "step": 4 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.5762788286319847e-08, "aux_brier/mean_group_std": 0.011597597801238597, "aux_brier/mean_r": 0.5177998114517441, "aux_brier/n_active_tok": 25.5, "aux_brier/n_groups": 5.875, "aux_brier/n_step_records": 6.375, "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.25, "calib/avg_num_step_conf": 0.203125, "calib/ece": 0.8155555555555556, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": -0.018749999999999933, "calib/mean_conf": 0.9266666666666666, "calib/mu_c": 0.91, "calib/mu_w": 0.92875, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.8155555555555556, "calib/std_conf": 0.06765927710061478, "calib/step_conf_rate": 0.0390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 804.40234375, "completions/mean_terminated_length": 865.2395629882812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.0030431875493377447, "learning_rate": 1.25e-06, "loss": 0.0143, "num_tokens": 1393264.0, "reward": 0.01749306544661522, "reward_std": 0.03999492526054382, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.007472265511751175, "rewards/format_reward_step_strict": 0.0234375, "step": 5 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.5357464446340503e-08, "aux_brier/mean_group_std": 0.018072298580218516, "aux_brier/mean_r": 0.500327113360118, "aux_brier/n_active_tok": 26.105263157894736, "aux_brier/n_groups": 6.2631578947368425, "aux_brier/n_step_records": 6.526315789473684, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.9, "calib/avg_num_step_conf": 0.49609375, "calib/ece": 0.8293750000000001, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.8125, "calib/gap": 0.10466666666666657, "calib/mean_conf": 0.891875, "calib/mu_c": 0.99, "calib/mu_w": 0.8853333333333334, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.8293750000000001, "calib/std_conf": 0.19013872402801066, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 624.328125, "completions/mean_terminated_length": 677.2373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.46461933851242065, "learning_rate": 1.5e-06, "loss": 0.0328, "num_tokens": 1659044.0, "reward": 0.029817283153533936, "reward_std": 0.0761086717247963, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.009894140064716339, "rewards/format_reward_step_strict": 0.046875, "step": 6 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.662441064184046e-09, "aux_brier/mean_group_std": 0.0037897744879094105, "aux_brier/mean_r": 0.4083800068223509, "aux_brier/n_active_tok": 21.866666666666667, "aux_brier/n_groups": 5.4, "aux_brier/n_step_records": 5.466666666666667, "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.25, "calib/avg_num_step_conf": 0.33203125, "calib/ece": 0.6752941176470587, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8823529411764706, "calib/gap": 0.04500000000000015, "calib/mean_conf": 0.9105882352941176, "calib/mu_c": 0.9450000000000001, "calib/mu_w": 0.8999999999999999, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.6752941176470587, "calib/std_conf": 0.20408967429958172, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 737.65234375, "completions/mean_terminated_length": 807.0043334960938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.2528690993785858, "learning_rate": 1.75e-06, "loss": 0.001, "num_tokens": 1955307.0, "reward": 0.03929208964109421, "reward_std": 0.08367627114057541, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01654335856437683, "rewards/format_reward_step_strict": 0.0390625, "step": 7 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.628442180306033e-09, "aux_brier/mean_group_std": 0.008700643628001825, "aux_brier/mean_r": 0.432177396158137, "aux_brier/n_active_tok": 18.46153846153846, "aux_brier/n_groups": 4.384615384615385, "aux_brier/n_step_records": 4.615384615384615, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.4285714285714286, "calib/avg_num_step_conf": 0.234375, "calib/ece": 0.5554545454545454, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.7272727272727273, "calib/gap": 0.06892857142857134, "calib/mean_conf": 0.8536363636363636, "calib/mu_c": 0.8975, "calib/mu_w": 0.8285714285714286, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.5227272727272727, "calib/std_conf": 0.27634868380602484, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 608.0234375, "completions/mean_terminated_length": 662.357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.45698079466819763, "learning_rate": 2.0000000000000003e-06, "loss": -0.0152, "num_tokens": 2217473.0, "reward": 0.03828798606991768, "reward_std": 0.08853976428508759, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.02033945359289646, "rewards/format_reward_step_strict": 0.03515625, "step": 8 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.008694010369464e-08, "aux_brier/mean_group_std": 0.02933807589196565, "aux_brier/mean_r": 0.4913759569149179, "aux_brier/n_active_tok": 18.76923076923077, "aux_brier/n_groups": 4.076923076923077, "aux_brier/n_step_records": 4.6923076923076925, "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.7916666666666665, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": -0.010000000000000009, "calib/mean_conf": 0.9583333333333331, "calib/mu_c": 0.95, "calib/mu_w": 0.96, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7916666666666665, "calib/std_conf": 0.027335365778094534, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 620.76171875, "completions/mean_terminated_length": 687.9437255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0096, "grad_norm": 0.003539730329066515, "learning_rate": 2.25e-06, "loss": 0.0037, "num_tokens": 2483924.0, "reward": 0.02196083962917328, "reward_std": 0.05420968309044838, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.0019058594480156898, "rewards/format_reward_step_strict": 0.02734375, "step": 9 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.4545744887618906e-08, "aux_brier/mean_group_std": 0.06240475362047568, "aux_brier/mean_r": 0.46709847997319487, "aux_brier/n_active_tok": 27.058823529411764, "aux_brier/n_groups": 5.235294117647059, "aux_brier/n_step_records": 6.764705882352941, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.48828125, "calib/ece": 0.726842105263158, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.7894736842105263, "calib/gap": 0.10520833333333324, "calib/mean_conf": 0.884736842105263, "calib/mu_c": 0.9733333333333333, "calib/mu_w": 0.868125, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.10546875, "calib/pce": 0.726842105263158, "calib/std_conf": 0.2317296160581309, "calib/step_conf_rate": 0.10546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2868.0, "completions/max_terminated_length": 2868.0, "completions/mean_length": 661.26171875, "completions/mean_terminated_length": 720.3531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 1.4912821054458618, "learning_rate": 2.5e-06, "loss": 0.0379, "num_tokens": 2760007.0, "reward": 0.05463603138923645, "reward_std": 0.1257432997226715, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.023231640458106995, "rewards/format_reward_step_strict": 0.06640625, "step": 10 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.9474720453555644e-08, "aux_brier/mean_group_std": 0.021085085040818102, "aux_brier/mean_r": 0.3412877070435653, "aux_brier/n_active_tok": 21.77777777777778, "aux_brier/n_groups": 4.333333333333333, "aux_brier/n_step_records": 5.444444444444445, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.5476190476190476, "calib/avg_num_step_conf": 0.40234375, "calib/ece": 0.6213157894736843, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.7368421052631579, "calib/gap": -0.08851190476190474, "calib/mean_conf": 0.8844736842105263, "calib/mu_c": 0.8285714285714285, "calib/mu_w": 0.9170833333333333, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.5686842105263158, "calib/std_conf": 0.23487339667440835, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2911.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 666.3671875, "completions/mean_terminated_length": 741.6956176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 1.0365098714828491, "learning_rate": 2.7500000000000004e-06, "loss": -0.0021, "num_tokens": 3035077.0, "reward": 0.0539645291864872, "reward_std": 0.11456333100795746, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.020545605570077896, "rewards/format_reward_step_strict": 0.04296875, "step": 11 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.703742298468484e-08, "aux_brier/mean_group_std": 0.0732713368122301, "aux_brier/mean_r": 0.4649826706328534, "aux_brier/n_active_tok": 27.4, "aux_brier/n_groups": 5.0, "aux_brier/n_step_records": 6.85, "calib/answer_extract_rate": 0.1484375, "calib/auroc": 0.5113636363636365, "calib/avg_num_step_conf": 0.5390625, "calib/ece": 0.5473235294117647, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.6764705882352942, "calib/gap": 0.04595454545454547, "calib/mean_conf": 0.900264705882353, "calib/mu_c": 0.93, "calib/mu_w": 0.8840454545454546, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.16015625, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.5473235294117647, "calib/std_conf": 0.1299206338780197, "calib/step_conf_rate": 0.12109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 623.390625, "completions/mean_terminated_length": 679.0978393554688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.6270592212677002, "learning_rate": 3e-06, "loss": 0.0169, "num_tokens": 3298841.0, "reward": 0.11310829222202301, "reward_std": 0.21758697926998138, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.05399569869041443, "rewards/format_reward_step_strict": 0.09765625, "step": 12 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.130551629052011e-08, "aux_brier/mean_group_std": 0.0709884432844243, "aux_brier/mean_r": 0.4623331838224613, "aux_brier/n_active_tok": 29.523809523809526, "aux_brier/n_groups": 4.9523809523809526, "aux_brier/n_step_records": 7.380952380952381, "calib/answer_extract_rate": 0.1640625, "calib/auroc": 0.6074380165289256, "calib/avg_num_step_conf": 0.609375, "calib/ece": 0.6156565656565657, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.10546875, "calib/frac_conf_gt_0.9": 0.7575757575757576, "calib/gap": -0.025606060606060743, "calib/mean_conf": 0.851010101010101, "calib/mu_c": 0.833939393939394, "calib/mu_w": 0.8595454545454547, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.1875, "calib/nonempty_step_conf_rate": 0.13671875, "calib/pce": 0.5666666666666667, "calib/std_conf": 0.25219105158053523, "calib/step_conf_rate": 0.13671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 627.24609375, "completions/mean_terminated_length": 680.4025268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.013427832163870335, "learning_rate": 3.2500000000000002e-06, "loss": 0.0577, "num_tokens": 3564008.0, "reward": 0.10244810581207275, "reward_std": 0.18835735321044922, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.05041740834712982, "rewards/format_reward_step_strict": 0.09375, "step": 13 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.1200137757683137e-07, "aux_brier/mean_group_std": 0.05895216164706083, "aux_brier/mean_r": 0.42568322537953823, "aux_brier/n_active_tok": 34.4, "aux_brier/n_groups": 6.68, "aux_brier/n_step_records": 8.6, "calib/answer_extract_rate": 0.1953125, "calib/auroc": 0.5366379310344828, "calib/avg_num_step_conf": 0.85546875, "calib/ece": 0.613117117117117, "calib/final_conf_rate": 0.14453125, "calib/format_rate": 0.12109375, "calib/frac_conf_gt_0.9": 0.7027027027027027, "calib/gap": 0.12524712643678182, "calib/mean_conf": 0.8293333333333333, "calib/mu_c": 0.9275, "calib/mu_w": 0.8022528735632182, "calib/nonempty_final_conf_rate": 0.14453125, "calib/nonempty_reasoning_rate": 0.23046875, "calib/nonempty_step_conf_rate": 0.1640625, "calib/pce": 0.613117117117117, "calib/std_conf": 0.2795984250300407, "calib/step_conf_rate": 0.1640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 643.43359375, "completions/mean_terminated_length": 722.4517822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.014933333333333333, "grad_norm": 0.56694495677948, "learning_rate": 3.5e-06, "loss": 0.0391, "num_tokens": 3834127.0, "reward": 0.10360130667686462, "reward_std": 0.17667700350284576, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.047217756509780884, "rewards/format_reward_step_strict": 0.11328125, "step": 14 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.4157165954693922e-08, "aux_brier/mean_group_std": 0.0736477397877375, "aux_brier/mean_r": 0.5192831668822072, "aux_brier/n_active_tok": 33.09090909090909, "aux_brier/n_groups": 4.909090909090909, "aux_brier/n_step_records": 8.272727272727273, "calib/answer_extract_rate": 0.21484375, "calib/auroc": 0.404692082111437, "calib/avg_num_step_conf": 0.73046875, "calib/ece": 0.6854523809523809, "calib/final_conf_rate": 0.1640625, "calib/format_rate": 0.12890625, "calib/frac_conf_gt_0.9": 0.7619047619047619, "calib/gap": -0.07600879765395907, "calib/mean_conf": 0.8507380952380951, "calib/mu_c": 0.7946363636363636, "calib/mu_w": 0.8706451612903227, "calib/nonempty_final_conf_rate": 0.1640625, "calib/nonempty_reasoning_rate": 0.25390625, "calib/nonempty_step_conf_rate": 0.17578125, "calib/pce": 0.6371428571428571, "calib/std_conf": 0.2692581571938341, "calib/step_conf_rate": 0.17578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 641.0078125, "completions/mean_terminated_length": 680.9046020507812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.34427380561828613, "learning_rate": 3.7500000000000005e-06, "loss": 0.0959, "num_tokens": 4106105.0, "reward": 0.11405427753925323, "reward_std": 0.2070690542459488, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.04996710270643234, "rewards/format_reward_step_strict": 0.1171875, "step": 15 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.01300370545232e-07, "aux_brier/mean_group_std": 0.10674272866305466, "aux_brier/mean_r": 0.5689924458994036, "aux_brier/n_active_tok": 38.24, "aux_brier/n_groups": 5.88, "aux_brier/n_step_records": 9.56, "calib/answer_extract_rate": 0.21484375, "calib/auroc": 0.3703208556149733, "calib/avg_num_step_conf": 0.95703125, "calib/ece": 0.6723777777777779, "calib/final_conf_rate": 0.17578125, "calib/format_rate": 0.12890625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.0361951871657753, "calib/mean_conf": 0.8937111111111109, "calib/mu_c": 0.8663636363636364, "calib/mu_w": 0.9025588235294117, "calib/nonempty_final_conf_rate": 0.17578125, "calib/nonempty_reasoning_rate": 0.2890625, "calib/nonempty_step_conf_rate": 0.22265625, "calib/pce": 0.6608222222222223, "calib/std_conf": 0.17408996680799813, "calib/step_conf_rate": 0.22265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 666.52734375, "completions/mean_terminated_length": 729.1923217773438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.944327712059021, "learning_rate": 4.000000000000001e-06, "loss": 0.1057, "num_tokens": 4385584.0, "reward": 0.11981875449419022, "reward_std": 0.24961884319782257, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.04958750307559967, "rewards/format_reward_step_strict": 0.12890625, "step": 16 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.3937364507766437e-07, "aux_brier/mean_group_std": 0.12641517603858168, "aux_brier/mean_r": 0.44455731146964983, "aux_brier/n_active_tok": 46.370370370370374, "aux_brier/n_groups": 6.925925925925926, "aux_brier/n_step_records": 11.592592592592593, "calib/answer_extract_rate": 0.26171875, "calib/auroc": 0.6513975155279502, "calib/avg_num_step_conf": 1.2421875, "calib/ece": 0.6728333333333334, "calib/final_conf_rate": 0.234375, "calib/format_rate": 0.1796875, "calib/frac_conf_gt_0.9": 0.6833333333333333, "calib/gap": 0.008291925465838568, "calib/mean_conf": 0.9015000000000001, "calib/mu_c": 0.9078571428571428, "calib/mu_w": 0.8995652173913042, "calib/nonempty_final_conf_rate": 0.234375, "calib/nonempty_reasoning_rate": 0.31640625, "calib/nonempty_step_conf_rate": 0.2421875, "calib/pce": 0.6705000000000001, "calib/std_conf": 0.13488050266810248, "calib/step_conf_rate": 0.2421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2922.0, "completions/max_terminated_length": 2922.0, "completions/mean_length": 589.24609375, "completions/mean_terminated_length": 644.6453247070312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 2.1329660415649414, "learning_rate": 4.25e-06, "loss": 0.1334, "num_tokens": 4639959.0, "reward": 0.16368788480758667, "reward_std": 0.28559666872024536, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.06881406903266907, "rewards/format_reward_step_strict": 0.17578125, "step": 17 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.4014752091667495e-10, "aux_brier/mean_group_std": 0.10244276346951456, "aux_brier/mean_r": 0.5107277369249441, "aux_brier/n_active_tok": 42.42857142857143, "aux_brier/n_groups": 6.25, "aux_brier/n_step_records": 10.607142857142858, "calib/answer_extract_rate": 0.24609375, "calib/auroc": 0.49113475177304966, "calib/avg_num_step_conf": 1.18359375, "calib/ece": 0.7297457627114853, "calib/final_conf_rate": 0.23046875, "calib/format_rate": 0.171875, "calib/frac_conf_gt_0.9": 0.6949152542372882, "calib/gap": -0.01680851064016098, "calib/mean_conf": 0.9083898305080954, "calib/mu_c": 0.8949999999981367, "calib/mu_w": 0.9118085106382977, "calib/nonempty_final_conf_rate": 0.23046875, "calib/nonempty_reasoning_rate": 0.3125, "calib/nonempty_step_conf_rate": 0.24609375, "calib/pce": 0.7173728813555531, "calib/std_conf": 0.1312051075053675, "calib/step_conf_rate": 0.24609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 628.92578125, "completions/mean_terminated_length": 670.8541870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 1.147321343421936, "learning_rate": 4.5e-06, "loss": 0.0787, "num_tokens": 4911684.0, "reward": 0.14179620146751404, "reward_std": 0.23915956914424896, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.05937226489186287, "rewards/format_reward_step_strict": 0.16015625, "step": 18 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.8516915939746923e-08, "aux_brier/mean_group_std": 0.16628030885569522, "aux_brier/mean_r": 0.49903873928528314, "aux_brier/n_active_tok": 57.5, "aux_brier/n_groups": 7.09375, "aux_brier/n_step_records": 14.375, "calib/answer_extract_rate": 0.41015625, "calib/auroc": 0.5369047619047619, "calib/avg_num_step_conf": 1.89453125, "calib/ece": 0.7307682692307691, "calib/final_conf_rate": 0.40625, "calib/format_rate": 0.31640625, "calib/frac_conf_gt_0.9": 0.7211538461538461, "calib/gap": -0.03471309523809529, "calib/mean_conf": 0.8865375, "calib/mu_c": 0.8584999999999999, "calib/mu_w": 0.8932130952380952, "calib/nonempty_final_conf_rate": 0.40625, "calib/nonempty_reasoning_rate": 0.453125, "calib/nonempty_step_conf_rate": 0.390625, "calib/pce": 0.7124990384615384, "calib/std_conf": 0.19888949298711855, "calib/step_conf_rate": 0.390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 476.7265625, "completions/mean_terminated_length": 500.1720886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.020266666666666665, "grad_norm": 0.4700727164745331, "learning_rate": 4.75e-06, "loss": 0.0943, "num_tokens": 5138486.0, "reward": 0.25758230686187744, "reward_std": 0.36318379640579224, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.10845429450273514, "rewards/format_reward_step_strict": 0.3046875, "step": 19 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.143965946587727e-09, "aux_brier/mean_group_std": 0.19972032193623057, "aux_brier/mean_r": 0.5313528417849928, "aux_brier/n_active_tok": 82.0, "aux_brier/n_groups": 7.71875, "aux_brier/n_step_records": 20.5, "calib/answer_extract_rate": 0.5703125, "calib/auroc": 0.5001596424010217, "calib/avg_num_step_conf": 2.625, "calib/ece": 0.6142682926829269, "calib/final_conf_rate": 0.48046875, "calib/format_rate": 0.3984375, "calib/frac_conf_gt_0.9": 0.7235772357723578, "calib/gap": 0.00022509578544083197, "calib/mean_conf": 0.8924796747967481, "calib/mu_c": 0.892638888888889, "calib/mu_w": 0.8924137931034481, "calib/nonempty_final_conf_rate": 0.48046875, "calib/nonempty_reasoning_rate": 0.66015625, "calib/nonempty_step_conf_rate": 0.53515625, "calib/pce": 0.6070325203252033, "calib/std_conf": 0.19001644556955094, "calib/step_conf_rate": 0.53515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 429.28125, "completions/mean_terminated_length": 439.5840148925781, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.021333333333333333, "grad_norm": 0.4443240165710449, "learning_rate": 5e-06, "loss": 0.0815, "num_tokens": 5353254.0, "reward": 0.38403499126434326, "reward_std": 0.4633830189704895, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.17676514387130737, "rewards/format_reward_step_strict": 0.3828125, "step": 20 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.135037942777254e-08, "aux_brier/mean_group_std": 0.18036753840812028, "aux_brier/mean_r": 0.4919380842477488, "aux_brier/n_active_tok": 80.0, "aux_brier/n_groups": 6.78125, "aux_brier/n_step_records": 20.0, "calib/answer_extract_rate": 0.59375, "calib/auroc": 0.4339560799737791, "calib/avg_num_step_conf": 2.54296875, "calib/ece": 0.6953455515957019, "calib/final_conf_rate": 0.546875, "calib/format_rate": 0.4765625, "calib/frac_conf_gt_0.9": 0.6857142857142857, "calib/gap": 0.008877805647903636, "calib/mean_conf": 0.875797305547155, "calib/mu_c": 0.882962962962963, "calib/mu_w": 0.8740851573150593, "calib/nonempty_final_conf_rate": 0.546875, "calib/nonempty_reasoning_rate": 0.66015625, "calib/nonempty_step_conf_rate": 0.578125, "calib/pce": 0.689142857142857, "calib/std_conf": 0.2066548389307303, "calib/step_conf_rate": 0.578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 373.3359375, "completions/mean_terminated_length": 391.6966857910156, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0224, "grad_norm": 0.7015453577041626, "learning_rate": 4.9722222222222224e-06, "loss": 0.0818, "num_tokens": 5551788.0, "reward": 0.3778233528137207, "reward_std": 0.4428941011428833, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.15973085165023804, "rewards/format_reward_step_strict": 0.44921875, "step": 21 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.202951125493627e-09, "aux_brier/mean_group_std": 0.2020469017186471, "aux_brier/mean_r": 0.5072843637279042, "aux_brier/n_active_tok": 103.875, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 25.96875, "calib/answer_extract_rate": 0.6796875, "calib/auroc": 0.494388327721661, "calib/avg_num_step_conf": 3.3203125, "calib/ece": 0.6857202380952381, "calib/final_conf_rate": 0.65625, "calib/format_rate": 0.59765625, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": 0.035859259259259035, "calib/mean_conf": 0.8678511904761905, "calib/mu_c": 0.8966666666666665, "calib/mu_w": 0.8608074074074075, "calib/nonempty_final_conf_rate": 0.65625, "calib/nonempty_reasoning_rate": 0.76953125, "calib/nonempty_step_conf_rate": 0.72265625, "calib/pce": 0.6785714285714285, "calib/std_conf": 0.21995826551873732, "calib/step_conf_rate": 0.72265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3061.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 367.03125, "completions/mean_terminated_length": 371.3834228515625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.023466666666666667, "grad_norm": 0.26140135526657104, "learning_rate": 4.944444444444445e-06, "loss": 0.1531, "num_tokens": 5747564.0, "reward": 0.461717426776886, "reward_std": 0.49233466386795044, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.20624473690986633, "rewards/format_reward_step_strict": 0.5546875, "step": 22 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.447986701710093e-09, "aux_brier/mean_group_std": 0.2216020534625502, "aux_brier/mean_r": 0.5362284046828788, "aux_brier/n_active_tok": 108.625, "aux_brier/n_groups": 9.09375, "aux_brier/n_step_records": 27.15625, "calib/answer_extract_rate": 0.73046875, "calib/auroc": 0.4558353317346123, "calib/avg_num_step_conf": 3.40625, "calib/ece": 0.7014285714285713, "calib/final_conf_rate": 0.68359375, "calib/format_rate": 0.60546875, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": 0.0043964828137490475, "calib/mean_conf": 0.8842857142857141, "calib/mu_c": 0.8877777777777778, "calib/mu_w": 0.8833812949640287, "calib/nonempty_final_conf_rate": 0.68359375, "calib/nonempty_reasoning_rate": 0.7890625, "calib/nonempty_step_conf_rate": 0.703125, "calib/pce": 0.69, "calib/std_conf": 0.20523276979059257, "calib/step_conf_rate": 0.703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 373.30859375, "completions/mean_terminated_length": 377.7351989746094, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.024533333333333334, "grad_norm": 1.195592999458313, "learning_rate": 4.9166666666666665e-06, "loss": 0.1399, "num_tokens": 5947067.0, "reward": 0.4916580021381378, "reward_std": 0.4707548916339874, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.20881952345371246, "rewards/format_reward_step_strict": 0.59765625, "step": 23 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.324486682100395e-09, "aux_brier/mean_group_std": 0.21149588199044533, "aux_brier/mean_r": 0.5487196136341741, "aux_brier/n_active_tok": 113.625, "aux_brier/n_groups": 8.21875, "aux_brier/n_step_records": 28.40625, "calib/answer_extract_rate": 0.77734375, "calib/auroc": 0.5435070306038048, "calib/avg_num_step_conf": 3.70703125, "calib/ece": 0.6639690721649485, "calib/final_conf_rate": 0.7578125, "calib/format_rate": 0.67578125, "calib/frac_conf_gt_0.9": 0.6597938144329897, "calib/gap": 0.04734656741108345, "calib/mean_conf": 0.8634536082474227, "calib/mu_c": 0.9012820512820513, "calib/mu_w": 0.8539354838709678, "calib/nonempty_final_conf_rate": 0.7578125, "calib/nonempty_reasoning_rate": 0.859375, "calib/nonempty_step_conf_rate": 0.79296875, "calib/pce": 0.6631958762886598, "calib/std_conf": 0.20837350092812282, "calib/step_conf_rate": 0.79296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2626.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 332.3359375, "completions/mean_terminated_length": 334.9527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.0256, "grad_norm": 0.965747058391571, "learning_rate": 4.888888888888889e-06, "loss": 0.0643, "num_tokens": 6136657.0, "reward": 0.5342280268669128, "reward_std": 0.44181251525878906, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.25409960746765137, "rewards/format_reward_step_strict": 0.63671875, "step": 24 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3584580010883585e-07, "aux_brier/mean_group_std": 0.19688430394395745, "aux_brier/mean_r": 0.5162810116111987, "aux_brier/n_active_tok": 124.25, "aux_brier/n_groups": 8.15625, "aux_brier/n_step_records": 31.0625, "calib/answer_extract_rate": 0.875, "calib/auroc": 0.5254296008869179, "calib/avg_num_step_conf": 4.00390625, "calib/ece": 0.6919815668202767, "calib/final_conf_rate": 0.84765625, "calib/format_rate": 0.76953125, "calib/frac_conf_gt_0.9": 0.7050691244239631, "calib/gap": 0.0659243348115296, "calib/mean_conf": 0.8809216589861752, "calib/mu_c": 0.9343902439024389, "calib/mu_w": 0.8684659090909093, "calib/nonempty_final_conf_rate": 0.84765625, "calib/nonempty_reasoning_rate": 0.9375, "calib/nonempty_step_conf_rate": 0.87890625, "calib/pce": 0.6919815668202767, "calib/std_conf": 0.20345648736224245, "calib/step_conf_rate": 0.87890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2002.0, "completions/max_terminated_length": 2002.0, "completions/mean_length": 284.24609375, "completions/mean_terminated_length": 285.3608093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.02666666666666667, "grad_norm": 0.481380432844162, "learning_rate": 4.861111111111111e-06, "loss": 0.0614, "num_tokens": 6312648.0, "reward": 0.6156750321388245, "reward_std": 0.44895535707473755, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.2673875093460083, "rewards/format_reward_step_strict": 0.75390625, "step": 25 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.948377328895971e-09, "aux_brier/mean_group_std": 0.2135516025891162, "aux_brier/mean_r": 0.5502119437421992, "aux_brier/n_active_tok": 131.625, "aux_brier/n_groups": 7.40625, "aux_brier/n_step_records": 32.90625, "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.5338899108532617, "calib/avg_num_step_conf": 4.2265625, "calib/ece": 0.7080394736842105, "calib/final_conf_rate": 0.890625, "calib/format_rate": 0.82421875, "calib/frac_conf_gt_0.9": 0.7017543859649122, "calib/gap": 0.03606608178859494, "calib/mean_conf": 0.8700570175438596, "calib/mu_c": 0.9002702702702703, "calib/mu_w": 0.8642041884816754, "calib/nonempty_final_conf_rate": 0.890625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.707907894736842, "calib/std_conf": 0.21965012309024798, "calib/step_conf_rate": 0.92578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2184.0, "completions/max_terminated_length": 2184.0, "completions/mean_length": 294.57421875, "completions/mean_terminated_length": 296.8937072753906, "completions/min_length": 0.0, "completions/min_terminated_length": 16.0, "epoch": 0.027733333333333332, "grad_norm": 0.09537330269813538, "learning_rate": 4.833333333333333e-06, "loss": 0.0627, "num_tokens": 6493299.0, "reward": 0.6087180972099304, "reward_std": 0.4283045530319214, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.27080994844436646, "rewards/format_reward_step_strict": 0.78515625, "step": 26 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.891819546659626e-09, "aux_brier/mean_group_std": 0.21629333705944145, "aux_brier/mean_r": 0.5526936772971238, "aux_brier/n_active_tok": 133.125, "aux_brier/n_groups": 7.46875, "aux_brier/n_step_records": 33.28125, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.5033190160093712, "calib/avg_num_step_conf": 4.203125, "calib/ece": 0.6975080508474576, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.859375, "calib/frac_conf_gt_0.9": 0.6228813559322034, "calib/gap": 0.008753859169595146, "calib/mean_conf": 0.8508978813559321, "calib/mu_c": 0.8582051282051282, "calib/mu_w": 0.849451269035533, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.6915758474576271, "calib/std_conf": 0.21551845008263174, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 268.7734375, "completions/mean_terminated_length": 268.7734375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.0288, "grad_norm": 0.08425747603178024, "learning_rate": 4.805555555555556e-06, "loss": 0.0392, "num_tokens": 6667321.0, "reward": 0.6474834680557251, "reward_std": 0.41931381821632385, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.29305893182754517, "rewards/format_reward_step_strict": 0.84375, "step": 27 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.8908671231148153e-08, "aux_brier/mean_group_std": 0.2179180649672654, "aux_brier/mean_r": 0.5355714524093081, "aux_brier/n_active_tok": 129.625, "aux_brier/n_groups": 7.65625, "aux_brier/n_step_records": 32.40625, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5541794033941183, "calib/avg_num_step_conf": 4.125, "calib/ece": 0.6512853448275864, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.8515625, "calib/frac_conf_gt_0.9": 0.6767241379310345, "calib/gap": 0.03645571835142836, "calib/mean_conf": 0.8620612068965517, "calib/mu_c": 0.890188679245283, "calib/mu_w": 0.8537329608938546, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.6424491379310346, "calib/std_conf": 0.22141740083301234, "calib/step_conf_rate": 0.91796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2241.0, "completions/max_terminated_length": 2241.0, "completions/mean_length": 278.6328125, "completions/mean_terminated_length": 279.7254943847656, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.029866666666666666, "grad_norm": 1.87068772315979, "learning_rate": 4.777777777777778e-06, "loss": 0.0243, "num_tokens": 6845595.0, "reward": 0.697235107421875, "reward_std": 0.46421217918395996, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.3280029296875, "rewards/format_reward_step_strict": 0.81640625, "step": 28 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.332386866450321e-10, "aux_brier/mean_group_std": 0.2190098622058668, "aux_brier/mean_r": 0.5798270102251177, "aux_brier/n_active_tok": 140.25, "aux_brier/n_groups": 8.5, "aux_brier/n_step_records": 35.0625, "calib/answer_extract_rate": 0.90625, "calib/auroc": 0.4625898156825992, "calib/avg_num_step_conf": 4.42578125, "calib/ece": 0.7391189427312777, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.6784140969162996, "calib/gap": 0.014039987503905005, "calib/mean_conf": 0.8780616740088105, "calib/mu_c": 0.8900606060606061, "calib/mu_w": 0.8760206185567011, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.7359030837004406, "calib/std_conf": 0.19273721737857602, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2458.0, "completions/max_terminated_length": 2458.0, "completions/mean_length": 305.74609375, "completions/mean_terminated_length": 306.94512939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.030933333333333334, "grad_norm": 5.483370304107666, "learning_rate": 4.75e-06, "loss": 0.1293, "num_tokens": 7030994.0, "reward": 0.5947896838188171, "reward_std": 0.399245023727417, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.25415873527526855, "rewards/format_reward_step_strict": 0.796875, "step": 29 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.4154402643018482e-08, "aux_brier/mean_group_std": 0.22314070917171333, "aux_brier/mean_r": 0.5867208662667103, "aux_brier/n_active_tok": 144.75, "aux_brier/n_groups": 8.1875, "aux_brier/n_step_records": 36.1875, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.5369423131571469, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.7144398340248963, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.6307053941908713, "calib/gap": 0.05795112247797651, "calib/mean_conf": 0.8555186721991701, "calib/mu_c": 0.9052941176470588, "calib/mu_w": 0.8473429951690823, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.7144398340248963, "calib/std_conf": 0.20074922079885882, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2062.0, "completions/max_terminated_length": 2062.0, "completions/mean_length": 277.85546875, "completions/mean_terminated_length": 281.15020751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.032, "grad_norm": 1.1227530241012573, "learning_rate": 4.722222222222222e-06, "loss": 0.0154, "num_tokens": 7209109.0, "reward": 0.6433886289596558, "reward_std": 0.3881877660751343, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.3079296946525574, "rewards/format_reward_step_strict": 0.859375, "step": 30 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.103466834525786e-09, "aux_brier/mean_group_std": 0.22134998099592884, "aux_brier/mean_r": 0.6055208892957307, "aux_brier/n_active_tok": 143.875, "aux_brier/n_groups": 8.6875, "aux_brier/n_step_records": 35.96875, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5408590571907155, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.6568340248962655, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.5809128630705395, "calib/gap": 0.05719262981574558, "calib/mean_conf": 0.8311078838174275, "calib/mu_c": 0.8783333333333335, "calib/mu_w": 0.8211407035175879, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6568340248962655, "calib/std_conf": 0.22082825224999478, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1854.0, "completions/max_terminated_length": 1854.0, "completions/mean_length": 245.19921875, "completions/mean_terminated_length": 245.19921875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.03306666666666667, "grad_norm": 0.18373198807239532, "learning_rate": 4.694444444444445e-06, "loss": 0.0428, "num_tokens": 7377792.0, "reward": 0.6938906908035278, "reward_std": 0.39770248532295227, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.3536878526210785, "rewards/format_reward_step_strict": 0.8828125, "step": 31 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.489524627060426e-09, "aux_brier/mean_group_std": 0.2142543951952405, "aux_brier/mean_r": 0.6805003927594797, "aux_brier/n_active_tok": 139.625, "aux_brier/n_groups": 8.03125, "aux_brier/n_step_records": 34.90625, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5390853658536585, "calib/avg_num_step_conf": 4.41015625, "calib/ece": 0.6308571428571428, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.46938775510204084, "calib/gap": 0.041695121951219494, "calib/mean_conf": 0.7866122448979591, "calib/mu_c": 0.8215, "calib/mu_w": 0.7798048780487805, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6271020408163266, "calib/std_conf": 0.24054435004767497, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2191.0, "completions/max_terminated_length": 2191.0, "completions/mean_length": 271.12109375, "completions/mean_terminated_length": 271.12109375, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.034133333333333335, "grad_norm": 0.5932040214538574, "learning_rate": 4.666666666666667e-06, "loss": 0.1114, "num_tokens": 7553903.0, "reward": 0.710291862487793, "reward_std": 0.34065794944763184, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.4036675691604614, "rewards/format_reward_step_strict": 0.90625, "step": 32 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.159146687830127e-08, "aux_brier/mean_group_std": 0.21152325473294545, "aux_brier/mean_r": 0.6525623588887066, "aux_brier/n_active_tok": 142.0, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 35.5, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4629950495049505, "calib/avg_num_step_conf": 4.4609375, "calib/ece": 0.6564462809917354, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.47520661157024796, "calib/gap": -0.05904207920792082, "calib/mean_conf": 0.7960330578512396, "calib/mu_c": 0.74675, "calib/mu_w": 0.8057920792079208, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.6435950413223139, "calib/std_conf": 0.22625948439585752, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2353.0, "completions/max_terminated_length": 2353.0, "completions/mean_length": 273.57421875, "completions/mean_terminated_length": 274.6470642089844, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.0352, "grad_norm": 0.8261376023292542, "learning_rate": 4.638888888888889e-06, "loss": 0.0665, "num_tokens": 7730810.0, "reward": 0.7009158134460449, "reward_std": 0.3576313257217407, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.36616331338882446, "rewards/format_reward_step_strict": 0.90625, "step": 33 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.266226020898211e-08, "aux_brier/mean_group_std": 0.17297121526688108, "aux_brier/mean_r": 0.755081980971118, "aux_brier/n_active_tok": 136.5, "aux_brier/n_groups": 7.84375, "aux_brier/n_step_records": 34.125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4553470919324578, "calib/avg_num_step_conf": 4.30078125, "calib/ece": 0.6106693989071038, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.4344262295081967, "calib/gap": 0.00845153220762962, "calib/mean_conf": 0.7664890710382515, "calib/mu_c": 0.7735897435897435, "calib/mu_w": 0.7651382113821139, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.6086612021857923, "calib/std_conf": 0.2548164727056724, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2135.0, "completions/max_terminated_length": 2135.0, "completions/mean_length": 237.875, "completions/mean_terminated_length": 237.875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.03626666666666667, "grad_norm": 0.4891229569911957, "learning_rate": 4.611111111111112e-06, "loss": -0.001, "num_tokens": 7896818.0, "reward": 0.7069133520126343, "reward_std": 0.3561670184135437, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.4057784676551819, "rewards/format_reward_step_strict": 0.90625, "step": 34 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.2876700296315935e-08, "aux_brier/mean_group_std": 0.19110142701219532, "aux_brier/mean_r": 0.7578070904330929, "aux_brier/n_active_tok": 143.0, "aux_brier/n_groups": 7.4375, "aux_brier/n_step_records": 35.75, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4994965767217076, "calib/avg_num_step_conf": 4.484375, "calib/ece": 0.5534279835390946, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.3950617283950617, "calib/gap": 0.005865485300040163, "calib/mean_conf": 0.7473127572016462, "calib/mu_c": 0.7519230769230769, "calib/mu_w": 0.7460575916230368, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.5433744855967078, "calib/std_conf": 0.2638745355196102, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 245.8203125, "completions/mean_terminated_length": 245.8203125, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.037333333333333336, "grad_norm": 0.13622772693634033, "learning_rate": 4.583333333333333e-06, "loss": -0.0141, "num_tokens": 8069004.0, "reward": 0.7694624662399292, "reward_std": 0.4192921817302704, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.4372250735759735, "rewards/format_reward_step_strict": 0.90625, "step": 35 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.5424452007306577e-08, "aux_brier/mean_group_std": 0.17299765102865022, "aux_brier/mean_r": 0.8162546674847609, "aux_brier/n_active_tok": 148.25, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 37.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4595408895265424, "calib/avg_num_step_conf": 4.66796875, "calib/ece": 0.39436507936507936, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.27380952380952384, "calib/gap": -0.03577044476327118, "calib/mean_conf": 0.6473015873015873, "calib/mu_c": 0.6231707317073171, "calib/mu_w": 0.6589411764705883, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3581349206349207, "calib/std_conf": 0.2916657488377741, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 775.0, "completions/max_terminated_length": 775.0, "completions/mean_length": 235.9375, "completions/mean_terminated_length": 235.9375, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.0384, "grad_norm": 0.325826495885849, "learning_rate": 4.555555555555556e-06, "loss": 0.0025, "num_tokens": 8232116.0, "reward": 0.9381535053253174, "reward_std": 0.4128226637840271, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.5573015213012695, "rewards/format_reward_step_strict": 0.95703125, "step": 36 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.690912870306761e-08, "aux_brier/mean_group_std": 0.1140466228276657, "aux_brier/mean_r": 0.8719295000616452, "aux_brier/n_active_tok": 142.875, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 35.71875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4106198034769463, "calib/avg_num_step_conf": 4.5, "calib/ece": 0.4436, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.204, "calib/gap": -0.08550075585789862, "calib/mean_conf": 0.59944, "calib/mu_c": 0.5324074074074074, "calib/mu_w": 0.6179081632653061, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.41352, "calib/std_conf": 0.2927874423536638, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 252.17578125, "completions/mean_terminated_length": 253.1647186279297, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.039466666666666664, "grad_norm": 1.0115748643875122, "learning_rate": 4.527777777777778e-06, "loss": -0.0142, "num_tokens": 8403769.0, "reward": 0.8041490316390991, "reward_std": 0.37829023599624634, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.5369086265563965, "rewards/format_reward_step_strict": 0.91796875, "step": 37 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.7883934211890846e-08, "aux_brier/mean_group_std": 0.13285248793464477, "aux_brier/mean_r": 0.8842191220307359, "aux_brier/n_active_tok": 141.0, "aux_brier/n_groups": 7.625, "aux_brier/n_step_records": 35.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4564952048823016, "calib/avg_num_step_conf": 4.4765625, "calib/ece": 0.3612145748987855, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.1700404858299595, "calib/gap": -0.040134263295553696, "calib/mean_conf": 0.5548987854251012, "calib/mu_c": 0.5248387096774193, "calib/mu_w": 0.564972972972973, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.33255060728744945, "calib/std_conf": 0.30877564454226697, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 242.1484375, "completions/mean_terminated_length": 242.1484375, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.04053333333333333, "grad_norm": 0.6419054269790649, "learning_rate": 4.5e-06, "loss": -0.0197, "num_tokens": 8572647.0, "reward": 0.8677629828453064, "reward_std": 0.37110045552253723, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.5804269313812256, "rewards/format_reward_step_strict": 0.9296875, "step": 38 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.4320344233229605e-08, "aux_brier/mean_group_std": 0.10604813857507032, "aux_brier/mean_r": 0.9041069110310074, "aux_brier/n_active_tok": 136.375, "aux_brier/n_groups": 7.65625, "aux_brier/n_step_records": 34.09375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.45264994948103243, "calib/avg_num_step_conf": 4.28125, "calib/ece": 0.3555645161290323, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.16129032258064516, "calib/gap": -0.059753834848902454, "calib/mean_conf": 0.49637096774193545, "calib/mu_c": 0.45035087719298245, "calib/mu_w": 0.5101047120418849, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3110483870967742, "calib/std_conf": 0.31395971253110144, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1043.0, "completions/max_terminated_length": 1043.0, "completions/mean_length": 246.81640625, "completions/mean_terminated_length": 247.78433227539062, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.0416, "grad_norm": 0.026621559634804726, "learning_rate": 4.472222222222223e-06, "loss": -0.0226, "num_tokens": 8741920.0, "reward": 0.8384658098220825, "reward_std": 0.35034242272377014, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.5960507392883301, "rewards/format_reward_step_strict": 0.93359375, "step": 39 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.717222444686973e-07, "aux_brier/mean_group_std": 0.08273824353456184, "aux_brier/mean_r": 0.923825084156272, "aux_brier/n_active_tok": 159.25, "aux_brier/n_groups": 9.03125, "aux_brier/n_step_records": 39.8125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5053658536585366, "calib/avg_num_step_conf": 4.98828125, "calib/ece": 0.3427584, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.116, "calib/gap": 0.019038699186991803, "calib/mean_conf": 0.4617216, "calib/mu_c": 0.4773333333333333, "calib/mu_w": 0.4582946341463415, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31224, "calib/std_conf": 0.30503245193493755, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 766.0, "completions/max_terminated_length": 766.0, "completions/mean_length": 260.7109375, "completions/mean_terminated_length": 261.73333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.042666666666666665, "grad_norm": 0.34497296810150146, "learning_rate": 4.444444444444444e-06, "loss": 0.0293, "num_tokens": 8915422.0, "reward": 0.8194528818130493, "reward_std": 0.2984590530395508, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.6606241464614868, "rewards/format_reward_step_strict": 0.94921875, "step": 40 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.04391604064136e-07, "aux_brier/mean_group_std": 0.07820914098556263, "aux_brier/mean_r": 0.9344050996274995, "aux_brier/n_active_tok": 150.25, "aux_brier/n_groups": 8.34375, "aux_brier/n_step_records": 37.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5114382472873039, "calib/avg_num_step_conf": 4.7421875, "calib/ece": 0.21831999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.068, "calib/gap": 0.0020402239270163602, "calib/mean_conf": 0.38112, "calib/mu_c": 0.38241758241758245, "calib/mu_w": 0.3803773584905661, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.11771999999999999, "calib/std_conf": 0.278381654567969, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 851.0, "completions/max_terminated_length": 851.0, "completions/mean_length": 244.734375, "completions/mean_terminated_length": 245.6941375732422, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.04373333333333333, "grad_norm": 0.36892974376678467, "learning_rate": 4.416666666666667e-06, "loss": 0.0224, "num_tokens": 9085322.0, "reward": 1.0038294792175293, "reward_std": 0.37469637393951416, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6559433341026306, "rewards/format_reward_step_strict": 0.9453125, "step": 41 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.246743733395732e-09, "aux_brier/mean_group_std": 0.06735158377045987, "aux_brier/mean_r": 0.9480342624386815, "aux_brier/n_active_tok": 149.375, "aux_brier/n_groups": 8.0625, "aux_brier/n_step_records": 37.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5320121951219513, "calib/avg_num_step_conf": 4.67578125, "calib/ece": 0.21748031496062997, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05905511811023622, "calib/gap": 0.01180516165626777, "calib/mean_conf": 0.3445669291338583, "calib/mu_c": 0.35256097560975613, "calib/mu_w": 0.34075581395348836, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11960629921259844, "calib/std_conf": 0.2856509513250953, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2133.0, "completions/max_terminated_length": 2133.0, "completions/mean_length": 243.15234375, "completions/mean_terminated_length": 243.15234375, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.0448, "grad_norm": 0.6862514615058899, "learning_rate": 4.388888888888889e-06, "loss": -0.025, "num_tokens": 9251937.0, "reward": 0.9844409823417664, "reward_std": 0.32201334834098816, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.695576548576355, "rewards/format_reward_step_strict": 0.98046875, "step": 42 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.117350127537001e-07, "aux_brier/mean_group_std": 0.05748583749375196, "aux_brier/mean_r": 0.9567184962157195, "aux_brier/n_active_tok": 156.75, "aux_brier/n_groups": 9.5, "aux_brier/n_step_records": 39.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4571264367816092, "calib/avg_num_step_conf": 4.9140625, "calib/ece": 0.2486746987951807, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.060240963855421686, "calib/gap": -0.04055172413793101, "calib/mean_conf": 0.3110040160642571, "calib/mu_c": 0.2826666666666667, "calib/mu_w": 0.3232183908045977, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12923694779116465, "calib/std_conf": 0.2726505994518655, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1094.0, "completions/max_terminated_length": 1094.0, "completions/mean_length": 263.7890625, "completions/mean_terminated_length": 264.82354736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.04586666666666667, "grad_norm": 0.4393234848976135, "learning_rate": 4.361111111111112e-06, "loss": -0.0137, "num_tokens": 9424691.0, "reward": 0.9548882842063904, "reward_std": 0.3085918426513672, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.6789281368255615, "rewards/format_reward_step_strict": 0.96875, "step": 43 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.494938199428589e-09, "aux_brier/mean_group_std": 0.05338743252005995, "aux_brier/mean_r": 0.9590074203253443, "aux_brier/n_active_tok": 171.75, "aux_brier/n_groups": 9.0625, "aux_brier/n_step_records": 42.9375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5048535043191735, "calib/avg_num_step_conf": 5.4140625, "calib/ece": 0.19866141732283465, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05905511811023622, "calib/gap": 0.003357378217116458, "calib/mean_conf": 0.28283464566929134, "calib/mu_c": 0.2854385964912281, "calib/mu_w": 0.28208121827411164, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12854330708661418, "calib/std_conf": 0.26993407663503965, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 290.1796875, "completions/mean_terminated_length": 291.3176574707031, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.046933333333333334, "grad_norm": 0.09494109451770782, "learning_rate": 4.333333333333334e-06, "loss": 0.0134, "num_tokens": 9605297.0, "reward": 0.8937559723854065, "reward_std": 0.25675415992736816, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7390863299369812, "rewards/format_reward_step_strict": 0.96484375, "step": 44 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.827950360139724e-07, "aux_brier/mean_group_std": 0.05122791971037719, "aux_brier/mean_r": 0.9636901903632081, "aux_brier/n_active_tok": 176.625, "aux_brier/n_groups": 10.375, "aux_brier/n_step_records": 44.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5433559792534152, "calib/avg_num_step_conf": 5.5625, "calib/ece": 0.191884, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.032, "calib/gap": 0.03134567901234561, "calib/mean_conf": 0.22115600000000002, "calib/mu_c": 0.24234567901234566, "calib/mu_w": 0.21100000000000005, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04451999999999999, "calib/std_conf": 0.2293511448935889, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 309.34765625, "completions/mean_terminated_length": 309.34765625, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.048, "grad_norm": 0.03964204341173172, "learning_rate": 4.305555555555556e-06, "loss": 0.0625, "num_tokens": 9789538.0, "reward": 0.9754707217216492, "reward_std": 0.3682977855205536, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.7143828868865967, "rewards/format_reward_step_strict": 0.9609375, "step": 45 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.2189748136437473e-06, "aux_brier/mean_group_std": 0.07619248927643729, "aux_brier/mean_r": 0.9463890522674315, "aux_brier/n_active_tok": 183.5, "aux_brier/n_groups": 11.78125, "aux_brier/n_step_records": 45.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5472061657032756, "calib/avg_num_step_conf": 5.7578125, "calib/ece": 0.22392519685039372, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.005120887747092018, "calib/mean_conf": 0.19741338582677165, "calib/mu_c": 0.20090123456790124, "calib/mu_w": 0.19578034682080922, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05122047244094488, "calib/std_conf": 0.21766503915337101, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2082.0, "completions/max_terminated_length": 2082.0, "completions/mean_length": 304.21484375, "completions/mean_terminated_length": 304.21484375, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.04906666666666667, "grad_norm": 0.08802807331085205, "learning_rate": 4.277777777777778e-06, "loss": 0.0079, "num_tokens": 9972185.0, "reward": 0.9807976484298706, "reward_std": 0.2961314618587494, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.7122529745101929, "rewards/format_reward_step_strict": 0.97265625, "step": 46 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 8.231699693272976e-07, "aux_brier/mean_group_std": 0.05437717597896134, "aux_brier/mean_r": 0.9577333014632524, "aux_brier/n_active_tok": 208.625, "aux_brier/n_groups": 14.84375, "aux_brier/n_step_records": 52.15625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5303409090909091, "calib/avg_num_step_conf": 6.578125, "calib/ece": 0.19926693227091632, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.001556818181818187, "calib/mean_conf": 0.15690836653386453, "calib/mu_c": 0.158, "calib/mu_w": 0.15644318181818181, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.028685258964143426, "calib/std_conf": 0.17983189242065695, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2550.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 340.875, "completions/mean_terminated_length": 340.875, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.050133333333333335, "grad_norm": 0.2760876417160034, "learning_rate": 4.25e-06, "loss": 0.1166, "num_tokens": 10165425.0, "reward": 0.9496136903762817, "reward_std": 0.28500351309776306, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.712517261505127, "rewards/format_reward_step_strict": 0.95703125, "step": 47 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.8346091193444103e-07, "aux_brier/mean_group_std": 0.06138813673286345, "aux_brier/mean_r": 0.9542347008064548, "aux_brier/n_active_tok": 171.875, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 42.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5143207282913165, "calib/avg_num_step_conf": 5.4296875, "calib/ece": 0.2421652173913044, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.010842226890756285, "calib/mean_conf": 0.14684664031620553, "calib/mu_c": 0.13964705882352943, "calib/mu_w": 0.15048928571428571, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02652173913043478, "calib/std_conf": 0.15776694035490982, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 286.203125, "completions/mean_terminated_length": 287.32550048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.0512, "grad_norm": 0.04931079223752022, "learning_rate": 4.222222222222223e-06, "loss": 0.0162, "num_tokens": 10342381.0, "reward": 0.9951045513153076, "reward_std": 0.2820585072040558, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6991681456565857, "rewards/format_reward_step_strict": 0.9765625, "step": 48 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.0588506880182535e-06, "aux_brier/mean_group_std": 0.056505151484209745, "aux_brier/mean_r": 0.9558889455944084, "aux_brier/n_active_tok": 195.125, "aux_brier/n_groups": 11.6875, "aux_brier/n_step_records": 48.78125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5084617501375894, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.26581460000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009991455696202509, "calib/mean_conf": 0.1081854, "calib/mu_c": 0.11449999999999999, "calib/mu_w": 0.10450854430379748, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003, "calib/std_conf": 0.10756204403431538, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2422.0, "completions/max_terminated_length": 2422.0, "completions/mean_length": 357.16015625, "completions/mean_terminated_length": 357.16015625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.05226666666666667, "grad_norm": 0.8869736194610596, "learning_rate": 4.194444444444445e-06, "loss": 0.0765, "num_tokens": 10538350.0, "reward": 1.0061066150665283, "reward_std": 0.28344491124153137, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6728639602661133, "rewards/format_reward_step_strict": 0.95703125, "step": 49 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.108546392681765e-06, "aux_brier/mean_group_std": 0.04090366449781096, "aux_brier/mean_r": 0.9695145255332592, "aux_brier/n_active_tok": 197.5, "aux_brier/n_groups": 13.0625, "aux_brier/n_step_records": 49.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5207922095012502, "calib/avg_num_step_conf": 6.28125, "calib/ece": 0.3382629482071713, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": -0.01959244637452298, "calib/mean_conf": 0.126199203187251, "calib/mu_c": 0.1145686274509804, "calib/mu_w": 0.13416107382550338, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02904382470119522, "calib/std_conf": 0.14819941932346195, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 349.48828125, "completions/mean_terminated_length": 350.8588562011719, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.05333333333333334, "grad_norm": 0.14290562272071838, "learning_rate": 4.166666666666667e-06, "loss": -0.0249, "num_tokens": 10733179.0, "reward": 1.0418678522109985, "reward_std": 0.2925070524215698, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6284089088439941, "rewards/format_reward_step_strict": 0.96484375, "step": 50 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3822614484959583e-06, "aux_brier/mean_group_std": 0.047827093132465026, "aux_brier/mean_r": 0.9717914343826168, "aux_brier/n_active_tok": 195.125, "aux_brier/n_groups": 10.6875, "aux_brier/n_step_records": 48.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4762771350578448, "calib/avg_num_step_conf": 6.109375, "calib/ece": 0.31774901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.01997738203561679, "calib/mean_conf": 0.10805490196078434, "calib/mu_c": 0.09575510204081635, "calib/mu_w": 0.11573248407643313, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.020745098039215683, "calib/std_conf": 0.12316498151686303, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1002.0, "completions/max_terminated_length": 1002.0, "completions/mean_length": 331.3203125, "completions/mean_terminated_length": 332.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.0544, "grad_norm": 0.16508427262306213, "learning_rate": 4.138888888888889e-06, "loss": 0.0142, "num_tokens": 10927293.0, "reward": 1.0353152751922607, "reward_std": 0.2406586855649948, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6490738391876221, "rewards/format_reward_step_strict": 0.98046875, "step": 51 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.570171554061034e-07, "aux_brier/mean_group_std": 0.07408634926746249, "aux_brier/mean_r": 0.9532624709956302, "aux_brier/n_active_tok": 166.0, "aux_brier/n_groups": 9.15625, "aux_brier/n_step_records": 41.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5591278640059127, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.4136469411764705, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.007451731337767942, "calib/mean_conf": 0.07654913725490195, "calib/mu_c": 0.08040650406504068, "calib/mu_w": 0.07295477272727274, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00392156862745098, "calib/std_conf": 0.09322034923034896, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1027.0, "completions/max_terminated_length": 1027.0, "completions/mean_length": 310.77734375, "completions/mean_terminated_length": 311.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.055466666666666664, "grad_norm": 0.18967147171497345, "learning_rate": 4.111111111111111e-06, "loss": -0.0302, "num_tokens": 11114804.0, "reward": 1.116457462310791, "reward_std": 0.31173473596572876, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5752047300338745, "rewards/format_reward_step_strict": 0.984375, "step": 52 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.1322278472023513e-06, "aux_brier/mean_group_std": 0.05321139141871668, "aux_brier/mean_r": 0.9492036096971312, "aux_brier/n_active_tok": 202.5, "aux_brier/n_groups": 12.8125, "aux_brier/n_step_records": 50.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5176851851851852, "calib/avg_num_step_conf": 6.3828125, "calib/ece": 0.3905490196078432, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001962962962962972, "calib/mean_conf": 0.08929411764705882, "calib/mu_c": 0.09033333333333333, "calib/mu_w": 0.08837037037037036, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004627450980392158, "calib/std_conf": 0.09922588492882152, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2000.0, "completions/max_terminated_length": 2000.0, "completions/mean_length": 375.09765625, "completions/mean_terminated_length": 375.09765625, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.05653333333333333, "grad_norm": 0.22604136168956757, "learning_rate": 4.083333333333334e-06, "loss": 0.0584, "num_tokens": 11316653.0, "reward": 1.1153674125671387, "reward_std": 0.2891712188720703, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5942816734313965, "rewards/format_reward_step_strict": 0.99609375, "step": 53 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.1980869404636394e-06, "aux_brier/mean_group_std": 0.06465943559997471, "aux_brier/mean_r": 0.9513168879103168, "aux_brier/n_active_tok": 188.75, "aux_brier/n_groups": 11.8125, "aux_brier/n_step_records": 47.1875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47375172068577154, "calib/avg_num_step_conf": 5.97265625, "calib/ece": 0.4015019762845849, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0025910399199098877, "calib/mean_conf": 0.08584980237154151, "calib/mu_c": 0.08450819672131149, "calib/mu_w": 0.08709923664122138, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.002569169960474308, "calib/std_conf": 0.09009749424007524, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1119.0, "completions/max_terminated_length": 1119.0, "completions/mean_length": 322.02734375, "completions/mean_terminated_length": 323.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0576, "grad_norm": 0.770313560962677, "learning_rate": 4.055555555555556e-06, "loss": 0.0044, "num_tokens": 11505324.0, "reward": 1.113966464996338, "reward_std": 0.2687970697879791, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5730531215667725, "rewards/format_reward_step_strict": 0.97265625, "step": 54 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.4401057884478337e-06, "aux_brier/mean_group_std": 0.07322190067020191, "aux_brier/mean_r": 0.9466186695298467, "aux_brier/n_active_tok": 189.375, "aux_brier/n_groups": 12.28125, "aux_brier/n_step_records": 47.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4996967859308672, "calib/avg_num_step_conf": 5.91796875, "calib/ece": 0.31548, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": 0.014952496462502546, "calib/mean_conf": 0.09187999999999999, "calib/mu_c": 0.10103092783505158, "calib/mu_w": 0.08607843137254903, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009680000000000001, "calib/std_conf": 0.1265703978029618, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 358.234375, "completions/mean_terminated_length": 359.6392517089844, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.058666666666666666, "grad_norm": 0.14670225977897644, "learning_rate": 4.027777777777779e-06, "loss": 0.0283, "num_tokens": 11704856.0, "reward": 1.027816891670227, "reward_std": 0.3024364113807678, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6503300666809082, "rewards/format_reward_step_strict": 0.97265625, "step": 55 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.122842637646393e-06, "aux_brier/mean_group_std": 0.05879436083996134, "aux_brier/mean_r": 0.9589549762011267, "aux_brier/n_active_tok": 187.75, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 46.9375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5174732100197939, "calib/avg_num_step_conf": 5.87890625, "calib/ece": 0.2992234126984126, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008867176301958768, "calib/mean_conf": 0.07284007936507936, "calib/mu_c": 0.0734065934065934, "calib/mu_w": 0.07251987577639753, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0054761904761904765, "calib/std_conf": 0.08554317217839719, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2558.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 367.98828125, "completions/mean_terminated_length": 367.98828125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.05973333333333333, "grad_norm": 0.16074801981449127, "learning_rate": 4.000000000000001e-06, "loss": 0.0882, "num_tokens": 11905901.0, "reward": 1.0079951286315918, "reward_std": 0.2715536057949066, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6647930145263672, "rewards/format_reward_step_strict": 0.97265625, "step": 56 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.1918806131226987e-07, "aux_brier/mean_group_std": 0.04542189579562134, "aux_brier/mean_r": 0.9712381790108774, "aux_brier/n_active_tok": 177.75, "aux_brier/n_groups": 9.5, "aux_brier/n_step_records": 44.4375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.38972431077694236, "calib/avg_num_step_conf": 5.5546875, "calib/ece": 0.46893280632411066, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02461716791979951, "calib/mean_conf": 0.06814229249011859, "calib/mu_c": 0.05646616541353383, "calib/mu_w": 0.08108333333333334, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0056916996047430835, "calib/std_conf": 0.07984240597810757, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1121.0, "completions/max_terminated_length": 1121.0, "completions/mean_length": 338.27734375, "completions/mean_terminated_length": 339.60394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.0608, "grad_norm": 0.06650305539369583, "learning_rate": 3.972222222222223e-06, "loss": 0.0277, "num_tokens": 12099292.0, "reward": 1.144626259803772, "reward_std": 0.25971370935440063, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5160050988197327, "rewards/format_reward_step_strict": 0.984375, "step": 57 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.9477825325964204e-06, "aux_brier/mean_group_std": 0.037870995762718844, "aux_brier/mean_r": 0.9764260570047406, "aux_brier/n_active_tok": 221.875, "aux_brier/n_groups": 16.0625, "aux_brier/n_step_records": 55.46875, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.49853854585312385, "calib/avg_num_step_conf": 6.93359375, "calib/ece": 0.2776422764227642, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005334307636097879, "calib/mean_conf": 0.06788617886178863, "calib/mu_c": 0.06823529411764707, "calib/mu_w": 0.06770186335403729, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.06247929130117005, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2753.0, "completions/max_terminated_length": 2753.0, "completions/mean_length": 450.12890625, "completions/mean_terminated_length": 451.8941345214844, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.06186666666666667, "grad_norm": 0.2836063802242279, "learning_rate": 3.944444444444445e-06, "loss": 0.1286, "num_tokens": 12320845.0, "reward": 0.9760801792144775, "reward_std": 0.32149428129196167, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6621332168579102, "rewards/format_reward_step_strict": 0.95703125, "step": 58 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.1172077150642963e-06, "aux_brier/mean_group_std": 0.039330208826736476, "aux_brier/mean_r": 0.9636895815931609, "aux_brier/n_active_tok": 205.875, "aux_brier/n_groups": 12.4375, "aux_brier/n_step_records": 51.46875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48166023166023164, "calib/avg_num_step_conf": 6.44921875, "calib/ece": 0.37063241106719375, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008438867438867431, "calib/mean_conf": 0.057984189723320166, "calib/mu_c": 0.05304761904761905, "calib/mu_w": 0.06148648648648648, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006798418972332016, "calib/std_conf": 0.054789281457994804, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2499.0, "completions/max_terminated_length": 2499.0, "completions/mean_length": 380.55078125, "completions/mean_terminated_length": 382.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.06293333333333333, "grad_norm": 0.3582397699356079, "learning_rate": 3.916666666666667e-06, "loss": -0.0261, "num_tokens": 12524514.0, "reward": 1.0533034801483154, "reward_std": 0.27245858311653137, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6116515398025513, "rewards/format_reward_step_strict": 0.98046875, "step": 59 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.900711513822209e-07, "aux_brier/mean_group_std": 0.0503927898602, "aux_brier/mean_r": 0.9692935058392566, "aux_brier/n_active_tok": 183.5, "aux_brier/n_groups": 11.21875, "aux_brier/n_step_records": 45.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4329994763026971, "calib/avg_num_step_conf": 5.92578125, "calib/ece": 0.41379032258064513, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.017140612725844454, "calib/mean_conf": 0.057419354838709684, "calib/mu_c": 0.04815789473684211, "calib/mu_w": 0.06529850746268656, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005766129032258064, "calib/std_conf": 0.059809792160593934, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2440.0, "completions/max_terminated_length": 2440.0, "completions/mean_length": 385.80859375, "completions/mean_terminated_length": 387.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.064, "grad_norm": 0.4637250602245331, "learning_rate": 3.88888888888889e-06, "loss": 0.0458, "num_tokens": 12732137.0, "reward": 1.0627686977386475, "reward_std": 0.28386422991752625, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5557624697685242, "rewards/format_reward_step_strict": 0.95703125, "step": 60 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.201558979095466e-06, "aux_brier/mean_group_std": 0.04909083466379876, "aux_brier/mean_r": 0.9660164374378446, "aux_brier/n_active_tok": 188.25, "aux_brier/n_groups": 13.6875, "aux_brier/n_step_records": 47.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4598214285714286, "calib/avg_num_step_conf": 5.91015625, "calib/ece": 0.5002390438247012, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.009219552929085308, "calib/mean_conf": 0.06087649402390439, "calib/mu_c": 0.05676258992805756, "calib/mu_w": 0.06598214285714286, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0036653386454183266, "calib/std_conf": 0.0767571627739752, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2202.0, "completions/max_terminated_length": 2202.0, "completions/mean_length": 351.453125, "completions/mean_terminated_length": 352.8313903808594, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.06506666666666666, "grad_norm": 0.34882640838623047, "learning_rate": 3.861111111111112e-06, "loss": 0.0681, "num_tokens": 12926173.0, "reward": 1.1527085304260254, "reward_std": 0.2640078663825989, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.4858339726924896, "rewards/format_reward_step_strict": 0.9765625, "step": 61 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.954978353346242e-06, "aux_brier/mean_group_std": 0.04640384300669598, "aux_brier/mean_r": 0.9693615416108637, "aux_brier/n_active_tok": 199.625, "aux_brier/n_groups": 13.28125, "aux_brier/n_step_records": 49.90625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4310763888888889, "calib/avg_num_step_conf": 6.2421875, "calib/ece": 0.3576639344262295, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0021555555555555425, "calib/mean_conf": 0.05217213114754098, "calib/mu_c": 0.05090000000000001, "calib/mu_w": 0.05305555555555555, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05048732881494173, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2842.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 418.1953125, "completions/mean_terminated_length": 421.4881896972656, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.06613333333333334, "grad_norm": 1.137746810913086, "learning_rate": 3.833333333333334e-06, "loss": 0.0835, "num_tokens": 13140311.0, "reward": 1.0145448446273804, "reward_std": 0.3309886157512665, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.5972417593002319, "rewards/format_reward_step_strict": 0.94921875, "step": 62 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3098082090046148e-06, "aux_brier/mean_group_std": 0.05808807354168096, "aux_brier/mean_r": 0.9589815650320522, "aux_brier/n_active_tok": 208.25, "aux_brier/n_groups": 14.375, "aux_brier/n_step_records": 52.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5473721272645797, "calib/avg_num_step_conf": 6.55859375, "calib/ece": 0.45544, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006080276550796987, "calib/mean_conf": 0.055040000000000006, "calib/mu_c": 0.05803149606299211, "calib/mu_w": 0.051951219512195126, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00124, "calib/std_conf": 0.04809364199143168, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 410.234375, "completions/mean_terminated_length": 411.8431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.0672, "grad_norm": 0.4824405610561371, "learning_rate": 3.8055555555555556e-06, "loss": 0.0679, "num_tokens": 13353971.0, "reward": 1.1175824403762817, "reward_std": 0.32058730721473694, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5328296422958374, "rewards/format_reward_step_strict": 0.9765625, "step": 63 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.089682363930038e-07, "aux_brier/mean_group_std": 0.03929437676435583, "aux_brier/mean_r": 0.966472297348743, "aux_brier/n_active_tok": 206.75, "aux_brier/n_groups": 13.53125, "aux_brier/n_step_records": 51.6875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5280862603305785, "calib/avg_num_step_conf": 6.46484375, "calib/ece": 0.46465060240963857, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0057679493801652895, "calib/mean_conf": 0.05149397590361446, "calib/mu_c": 0.05429687500000001, "calib/mu_w": 0.04852892561983472, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001044176706827309, "calib/std_conf": 0.04849329192064222, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2827.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 415.7890625, "completions/mean_terminated_length": 415.7890625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.06826666666666667, "grad_norm": 0.17579436302185059, "learning_rate": 3.777777777777778e-06, "loss": 0.0318, "num_tokens": 13564189.0, "reward": 1.118802785873413, "reward_std": 0.2620909810066223, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5220866799354553, "rewards/format_reward_step_strict": 0.96875, "step": 64 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.4793348144536154e-06, "aux_brier/mean_group_std": 0.04025712574762179, "aux_brier/mean_r": 0.9747281687179985, "aux_brier/n_active_tok": 162.625, "aux_brier/n_groups": 9.4375, "aux_brier/n_step_records": 40.65625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5354064039408868, "calib/avg_num_step_conf": 5.1875, "calib/ece": 0.40806250000000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025655172413793156, "calib/mean_conf": 0.045062500000000005, "calib/mu_c": 0.04646551724137932, "calib/mu_w": 0.0439, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.035292029464880595, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 869.0, "completions/max_terminated_length": 869.0, "completions/mean_length": 320.703125, "completions/mean_terminated_length": 321.9608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.06933333333333333, "grad_norm": 0.18231400847434998, "learning_rate": 3.7500000000000005e-06, "loss": 0.0034, "num_tokens": 13751313.0, "reward": 1.0917396545410156, "reward_std": 0.20031163096427917, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5778957009315491, "rewards/format_reward_step_strict": 0.98828125, "step": 65 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3928245592431665e-06, "aux_brier/mean_group_std": 0.03936907017730624, "aux_brier/mean_r": 0.9696451228882946, "aux_brier/n_active_tok": 205.5, "aux_brier/n_groups": 15.1875, "aux_brier/n_step_records": 51.375, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4852816901408451, "calib/avg_num_step_conf": 6.65625, "calib/ece": 0.36332644628099175, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002499999999999933, "calib/mean_conf": 0.04989669421487604, "calib/mu_c": 0.04975, "calib/mu_w": 0.049999999999999996, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.03719628621354623, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2605.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 439.7109375, "completions/mean_terminated_length": 444.9249267578125, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.0704, "grad_norm": 0.2523045539855957, "learning_rate": 3.7222222222222225e-06, "loss": 0.0963, "num_tokens": 13970231.0, "reward": 0.9941548109054565, "reward_std": 0.27787861227989197, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.5781815648078918, "rewards/format_reward_step_strict": 0.91796875, "step": 66 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.2032974873464894e-06, "aux_brier/mean_group_std": 0.04711280885917892, "aux_brier/mean_r": 0.9700329708544827, "aux_brier/n_active_tok": 169.625, "aux_brier/n_groups": 9.53125, "aux_brier/n_step_records": 42.40625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4241315136476427, "calib/avg_num_step_conf": 5.30859375, "calib/ece": 0.4459448818897637, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.015548387096774197, "calib/mean_conf": 0.04940944881889764, "calib/mu_c": 0.041451612903225805, "calib/mu_w": 0.057, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0035826771653543307, "calib/std_conf": 0.06277730347808565, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2163.0, "completions/max_terminated_length": 2163.0, "completions/mean_length": 375.67578125, "completions/mean_terminated_length": 375.67578125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.07146666666666666, "grad_norm": 0.12285065650939941, "learning_rate": 3.694444444444445e-06, "loss": 0.0227, "num_tokens": 14171412.0, "reward": 1.1061146259307861, "reward_std": 0.22164198756217957, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5338335633277893, "rewards/format_reward_step_strict": 0.9765625, "step": 67 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.1232037882043e-06, "aux_brier/mean_group_std": 0.030176616952614116, "aux_brier/mean_r": 0.980834637625183, "aux_brier/n_active_tok": 186.25, "aux_brier/n_groups": 12.03125, "aux_brier/n_step_records": 46.5625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.3720454545454545, "calib/avg_num_step_conf": 5.82421875, "calib/ece": 0.3940000000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015194805194805198, "calib/mean_conf": 0.0476, "calib/mu_c": 0.039090909090909086, "calib/mu_w": 0.054285714285714284, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008, "calib/std_conf": 0.03278780261011707, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2373.0, "completions/max_terminated_length": 2373.0, "completions/mean_length": 390.41796875, "completions/mean_terminated_length": 390.41796875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.07253333333333334, "grad_norm": 0.41778209805488586, "learning_rate": 3.6666666666666666e-06, "loss": 0.0667, "num_tokens": 14375447.0, "reward": 1.058363914489746, "reward_std": 0.2913435101509094, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5772062540054321, "rewards/format_reward_step_strict": 0.96875, "step": 68 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.1240967232605268e-06, "aux_brier/mean_group_std": 0.050432826361544364, "aux_brier/mean_r": 0.9661016659552276, "aux_brier/n_active_tok": 167.375, "aux_brier/n_groups": 10.21875, "aux_brier/n_step_records": 41.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4689986282578875, "calib/avg_num_step_conf": 5.23046875, "calib/ece": 0.3199603174603175, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.01727160493827161, "calib/mean_conf": 0.0521031746031746, "calib/mu_c": 0.041, "calib/mu_w": 0.05827160493827161, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.00746031746031746, "calib/std_conf": 0.08617500920884266, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2448.0, "completions/max_terminated_length": 2448.0, "completions/mean_length": 419.3984375, "completions/mean_terminated_length": 419.3984375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.0736, "grad_norm": 0.0447511188685894, "learning_rate": 3.638888888888889e-06, "loss": 0.0633, "num_tokens": 14587309.0, "reward": 0.9966806173324585, "reward_std": 0.2783878743648529, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6429726481437683, "rewards/format_reward_step_strict": 0.96875, "step": 69 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.4102203126980015e-06, "aux_brier/mean_group_std": 0.05047837073747982, "aux_brier/mean_r": 0.9658483138018061, "aux_brier/n_active_tok": 195.0, "aux_brier/n_groups": 14.65625, "aux_brier/n_step_records": 48.75, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5441705002875216, "calib/avg_num_step_conf": 6.1015625, "calib/ece": 0.34613636363636363, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0027666762507188025, "calib/mean_conf": 0.043946280991735545, "calib/mu_c": 0.04563829787234042, "calib/mu_w": 0.04287162162162162, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0008264462809917356, "calib/std_conf": 0.03549277086520069, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 449.90234375, "completions/mean_terminated_length": 453.44488525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.07466666666666667, "grad_norm": 0.40294215083122253, "learning_rate": 3.6111111111111115e-06, "loss": 0.1819, "num_tokens": 14809476.0, "reward": 0.9851260185241699, "reward_std": 0.2996982932090759, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6045666933059692, "rewards/format_reward_step_strict": 0.93359375, "step": 70 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.141000971593975e-06, "aux_brier/mean_group_std": 0.049329877547884066, "aux_brier/mean_r": 0.9649408123850951, "aux_brier/n_active_tok": 196.375, "aux_brier/n_groups": 11.875, "aux_brier/n_step_records": 49.09375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5205055802070727, "calib/avg_num_step_conf": 6.140625, "calib/ece": 0.4036530612244898, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004803684281296447, "calib/mean_conf": 0.05144897959183674, "calib/mu_c": 0.05171171171171173, "calib/mu_w": 0.05123134328358209, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0010204081632653062, "calib/std_conf": 0.0350457783561932, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2731.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 437.84765625, "completions/mean_terminated_length": 441.2952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.07573333333333333, "grad_norm": 0.34749463200569153, "learning_rate": 3.5833333333333335e-06, "loss": 0.0656, "num_tokens": 15025973.0, "reward": 1.0552055835723877, "reward_std": 0.31829553842544556, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5645725727081299, "rewards/format_reward_step_strict": 0.953125, "step": 71 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.897831544780184e-06, "aux_brier/mean_group_std": 0.02546374854744683, "aux_brier/mean_r": 0.9808279287671078, "aux_brier/n_active_tok": 195.0, "aux_brier/n_groups": 12.34375, "aux_brier/n_step_records": 48.75, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5002283105022831, "calib/avg_num_step_conf": 6.09375, "calib/ece": 0.3670916334661355, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000158512720156552, "calib/mean_conf": 0.05123505976095617, "calib/mu_c": 0.05114285714285715, "calib/mu_w": 0.0513013698630137, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03471667518287719, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2620.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 417.3359375, "completions/mean_terminated_length": 418.9725646972656, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.0768, "grad_norm": 0.2863672971725464, "learning_rate": 3.555555555555556e-06, "loss": 0.0628, "num_tokens": 15237219.0, "reward": 1.0475611686706543, "reward_std": 0.2560883164405823, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6043074131011963, "rewards/format_reward_step_strict": 0.97265625, "step": 72 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.610961066298103e-06, "aux_brier/mean_group_std": 0.04337953391220954, "aux_brier/mean_r": 0.9738971359935481, "aux_brier/n_active_tok": 174.5, "aux_brier/n_groups": 9.625, "aux_brier/n_step_records": 43.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44381250000000005, "calib/avg_num_step_conf": 5.58203125, "calib/ece": 0.44490118577075094, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004210000000000005, "calib/mean_conf": 0.04916996047430831, "calib/mu_c": 0.04704, "calib/mu_w": 0.051250000000000004, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.034670994113304016, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 388.73828125, "completions/mean_terminated_length": 390.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.07786666666666667, "grad_norm": 0.3675096035003662, "learning_rate": 3.5277777777777784e-06, "loss": 0.0033, "num_tokens": 15443768.0, "reward": 1.1199455261230469, "reward_std": 0.28744661808013916, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5422824621200562, "rewards/format_reward_step_strict": 0.984375, "step": 73 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.4108433704105323e-06, "aux_brier/mean_group_std": 0.02549180726143498, "aux_brier/mean_r": 0.985227529095446, "aux_brier/n_active_tok": 201.375, "aux_brier/n_groups": 12.21875, "aux_brier/n_step_records": 50.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5235201012978791, "calib/avg_num_step_conf": 6.29296875, "calib/ece": 0.4140873015873016, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0018518518518518476, "calib/mean_conf": 0.05321428571428572, "calib/mu_c": 0.052222222222222225, "calib/mu_w": 0.05407407407407407, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015079365079365078, "calib/std_conf": 0.03993528637064806, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2280.0, "completions/max_terminated_length": 2280.0, "completions/mean_length": 409.79296875, "completions/mean_terminated_length": 411.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.07893333333333333, "grad_norm": 0.23363105952739716, "learning_rate": 3.5e-06, "loss": 0.0495, "num_tokens": 15652603.0, "reward": 1.0918989181518555, "reward_std": 0.28715503215789795, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5707206726074219, "rewards/format_reward_step_strict": 0.984375, "step": 74 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.5379526267244046e-06, "aux_brier/mean_group_std": 0.05564478248335544, "aux_brier/mean_r": 0.9594891161755524, "aux_brier/n_active_tok": 180.125, "aux_brier/n_groups": 9.0625, "aux_brier/n_step_records": 45.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5691056910569106, "calib/avg_num_step_conf": 5.64453125, "calib/ece": 0.5977952755905512, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002180216802168025, "calib/mean_conf": 0.05062992125984252, "calib/mu_c": 0.05140243902439024, "calib/mu_w": 0.049222222222222216, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0013779527559055118, "calib/std_conf": 0.03421212271898196, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 874.0, "completions/max_terminated_length": 874.0, "completions/mean_length": 366.98828125, "completions/mean_terminated_length": 368.4274597167969, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.08, "grad_norm": 0.13181133568286896, "learning_rate": 3.4722222222222224e-06, "loss": 0.0055, "num_tokens": 15851304.0, "reward": 1.2383170127868652, "reward_std": 0.26131200790405273, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.4063929617404938, "rewards/format_reward_step_strict": 0.984375, "step": 75 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.3144727633029873e-06, "aux_brier/mean_group_std": 0.04790952991891247, "aux_brier/mean_r": 0.9658543030835265, "aux_brier/n_active_tok": 191.875, "aux_brier/n_groups": 12.28125, "aux_brier/n_step_records": 47.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.45326492537313434, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.4740551181102362, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010891791044776139, "calib/mean_conf": 0.053503937007874015, "calib/mu_c": 0.04835820895522387, "calib/mu_w": 0.05925000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.041430213406705566, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 405.296875, "completions/mean_terminated_length": 405.296875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.08106666666666666, "grad_norm": 0.19137753546237946, "learning_rate": 3.444444444444445e-06, "loss": 0.0287, "num_tokens": 16058116.0, "reward": 1.1433509588241577, "reward_std": 0.27575671672821045, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5109038949012756, "rewards/format_reward_step_strict": 0.984375, "step": 76 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.123670227174081e-06, "aux_brier/mean_group_std": 0.055487238056417, "aux_brier/mean_r": 0.9620829956995886, "aux_brier/n_active_tok": 183.625, "aux_brier/n_groups": 11.375, "aux_brier/n_step_records": 45.90625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5336568371282816, "calib/avg_num_step_conf": 5.77734375, "calib/ece": 0.5063882352941176, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015235162374020034, "calib/mean_conf": 0.046552941176470596, "calib/mu_c": 0.04723404255319149, "calib/mu_w": 0.045710526315789486, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.02907266321222467, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1458.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 361.859375, "completions/mean_terminated_length": 363.2784423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.08213333333333334, "grad_norm": 0.22153005003929138, "learning_rate": 3.416666666666667e-06, "loss": -0.006, "num_tokens": 16255416.0, "reward": 1.159740686416626, "reward_std": 0.25410184264183044, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.48271265625953674, "rewards/format_reward_step_strict": 0.9765625, "step": 77 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.0395815451318455e-06, "aux_brier/mean_group_std": 0.05266590438357986, "aux_brier/mean_r": 0.9663016684179435, "aux_brier/n_active_tok": 189.875, "aux_brier/n_groups": 10.96875, "aux_brier/n_step_records": 47.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5655609695152424, "calib/avg_num_step_conf": 5.9609375, "calib/ece": 0.4081141732283464, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008458020989505238, "calib/mean_conf": 0.05023228346456693, "calib/mu_c": 0.054827586206896546, "calib/mu_w": 0.04636956521739131, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.000826771653543307, "calib/std_conf": 0.03471885367199788, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2197.0, "completions/max_terminated_length": 2197.0, "completions/mean_length": 421.3125, "completions/mean_terminated_length": 421.3125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.0832, "grad_norm": 0.2403065264225006, "learning_rate": 3.3888888888888893e-06, "loss": 0.0122, "num_tokens": 16471296.0, "reward": 1.082634687423706, "reward_std": 0.2751431167125702, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5805391073226929, "rewards/format_reward_step_strict": 0.96875, "step": 78 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.425094732087633e-07, "aux_brier/mean_group_std": 0.05248357026304297, "aux_brier/mean_r": 0.9611150219353268, "aux_brier/n_active_tok": 208.75, "aux_brier/n_groups": 12.5625, "aux_brier/n_step_records": 52.1875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48201392882243943, "calib/avg_num_step_conf": 6.625, "calib/ece": 0.5057539682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006382978723404181, "calib/mean_conf": 0.057023809523809525, "calib/mu_c": 0.05730496453900708, "calib/mu_w": 0.056666666666666664, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0016269841269841267, "calib/std_conf": 0.050992835520065144, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 439.96875, "completions/mean_terminated_length": 441.69415283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.08426666666666667, "grad_norm": 0.1738610863685608, "learning_rate": 3.3611111111111117e-06, "loss": 0.0231, "num_tokens": 16690304.0, "reward": 1.1637357473373413, "reward_std": 0.21450775861740112, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.49088048934936523, "rewards/format_reward_step_strict": 0.98046875, "step": 79 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.0338823488195175e-06, "aux_brier/mean_group_std": 0.05436176500071805, "aux_brier/mean_r": 0.9586528520801569, "aux_brier/n_active_tok": 201.0, "aux_brier/n_groups": 11.875, "aux_brier/n_step_records": 50.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4549990585577104, "calib/avg_num_step_conf": 6.2890625, "calib/ece": 0.5021653543307086, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005202410092261363, "calib/mean_conf": 0.052952755905511815, "calib/mu_c": 0.05063829787234042, "calib/mu_w": 0.055840707964601784, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.03126502366868463, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 400.28125, "completions/mean_terminated_length": 400.28125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.08533333333333333, "grad_norm": 0.01653141714632511, "learning_rate": 3.3333333333333333e-06, "loss": 0.0454, "num_tokens": 16894936.0, "reward": 1.1604695320129395, "reward_std": 0.3131512403488159, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.4856281280517578, "rewards/format_reward_step_strict": 0.9765625, "step": 80 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.933250929093578e-06, "aux_brier/mean_group_std": 0.05645298643859676, "aux_brier/mean_r": 0.9614778421297897, "aux_brier/n_active_tok": 215.5, "aux_brier/n_groups": 14.375, "aux_brier/n_step_records": 53.875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4622797175103953, "calib/avg_num_step_conf": 6.8125, "calib/ece": 0.5088306451612903, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005889380238928127, "calib/mean_conf": 0.051653225806451625, "calib/mu_c": 0.04906474820143885, "calib/mu_w": 0.05495412844036698, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.035685802805208645, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 450.7734375, "completions/mean_terminated_length": 452.54119873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.0864, "grad_norm": 0.3035520017147064, "learning_rate": 3.3055555555555558e-06, "loss": 0.0802, "num_tokens": 17116582.0, "reward": 1.139133334159851, "reward_std": 0.2880188822746277, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.4705960750579834, "rewards/format_reward_step_strict": 0.95703125, "step": 81 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.6320311266037066e-06, "aux_brier/mean_group_std": 0.0444149556308205, "aux_brier/mean_r": 0.9713469110688953, "aux_brier/n_active_tok": 193.5, "aux_brier/n_groups": 10.40625, "aux_brier/n_step_records": 48.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5341914191419141, "calib/avg_num_step_conf": 6.1953125, "calib/ece": 0.548406374501992, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011518151815181482, "calib/mean_conf": 0.049203187250996025, "calib/mu_c": 0.049666666666666665, "calib/mu_w": 0.048514851485148516, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.027078213221623286, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2275.0, "completions/max_terminated_length": 2275.0, "completions/mean_length": 382.77734375, "completions/mean_terminated_length": 384.2784423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.08746666666666666, "grad_norm": 0.4123811721801758, "learning_rate": 3.277777777777778e-06, "loss": 0.0258, "num_tokens": 17320125.0, "reward": 1.1856541633605957, "reward_std": 0.23773129284381866, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.44574177265167236, "rewards/format_reward_step_strict": 0.9765625, "step": 82 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.098681497815564e-06, "aux_brier/mean_group_std": 0.06456052063731982, "aux_brier/mean_r": 0.950112133064767, "aux_brier/n_active_tok": 222.375, "aux_brier/n_groups": 13.28125, "aux_brier/n_step_records": 55.59375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4394692665289256, "calib/avg_num_step_conf": 7.1328125, "calib/ece": 0.4631726907630522, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0072843491735537155, "calib/mean_conf": 0.05088353413654619, "calib/mu_c": 0.04734375000000001, "calib/mu_w": 0.054628099173553726, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.03790739204226033, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2622.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 478.51953125, "completions/mean_terminated_length": 482.28741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.08853333333333334, "grad_norm": 0.2955869436264038, "learning_rate": 3.2500000000000002e-06, "loss": 0.0294, "num_tokens": 17549890.0, "reward": 1.1055645942687988, "reward_std": 0.25129184126853943, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.508195698261261, "rewards/format_reward_step_strict": 0.95703125, "step": 83 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.6381674451680936e-06, "aux_brier/mean_group_std": 0.057917053717292825, "aux_brier/mean_r": 0.9582471606331168, "aux_brier/n_active_tok": 207.625, "aux_brier/n_groups": 13.59375, "aux_brier/n_step_records": 51.90625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5576023391812865, "calib/avg_num_step_conf": 6.640625, "calib/ece": 0.4935341365461848, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0065419103313840085, "calib/mean_conf": 0.048634538152610436, "calib/mu_c": 0.05162962962962962, "calib/mu_w": 0.045087719298245614, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.02946209483413582, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2865.0, "completions/max_terminated_length": 2865.0, "completions/mean_length": 415.23046875, "completions/mean_terminated_length": 416.8588562011719, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.0896, "grad_norm": 0.25714656710624695, "learning_rate": 3.2222222222222227e-06, "loss": 0.0668, "num_tokens": 17762109.0, "reward": 1.1339223384857178, "reward_std": 0.27987125515937805, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.48881447315216064, "rewards/format_reward_step_strict": 0.9609375, "step": 84 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.243233125311718e-06, "aux_brier/mean_group_std": 0.05724537831026044, "aux_brier/mean_r": 0.9563792340685818, "aux_brier/n_active_tok": 217.875, "aux_brier/n_groups": 13.40625, "aux_brier/n_step_records": 54.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5025252525252525, "calib/avg_num_step_conf": 6.87890625, "calib/ece": 0.47930522088353417, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002839937839937967, "calib/mean_conf": 0.05081526104417672, "calib/mu_c": 0.050681818181818175, "calib/mu_w": 0.05096581196581197, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.030760344654199002, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2615.0, "completions/max_terminated_length": 2615.0, "completions/mean_length": 444.10546875, "completions/mean_terminated_length": 445.8470764160156, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.09066666666666667, "grad_norm": 0.1907009482383728, "learning_rate": 3.1944444444444443e-06, "loss": 0.0192, "num_tokens": 17983624.0, "reward": 1.124513030052185, "reward_std": 0.23452921211719513, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5058649778366089, "rewards/format_reward_step_strict": 0.96484375, "step": 85 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.1181750219635411e-06, "aux_brier/mean_group_std": 0.0637853497743649, "aux_brier/mean_r": 0.958083130671536, "aux_brier/n_active_tok": 224.5, "aux_brier/n_groups": 15.90625, "aux_brier/n_step_records": 56.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5510873640794901, "calib/avg_num_step_conf": 7.046875, "calib/ece": 0.45339525691699606, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003339332583427078, "calib/mean_conf": 0.048581027667984195, "calib/mu_c": 0.050244094488188985, "calib/mu_w": 0.04690476190476191, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.02960688340891283, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 447.2890625, "completions/mean_terminated_length": 447.2890625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.09173333333333333, "grad_norm": 0.1536872535943985, "learning_rate": 3.1666666666666667e-06, "loss": 0.0038, "num_tokens": 18203642.0, "reward": 1.1151789426803589, "reward_std": 0.28822559118270874, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5310282707214355, "rewards/format_reward_step_strict": 0.97265625, "step": 86 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.944583430592346e-07, "aux_brier/mean_group_std": 0.07160175411029325, "aux_brier/mean_r": 0.9462595403132358, "aux_brier/n_active_tok": 191.5, "aux_brier/n_groups": 11.46875, "aux_brier/n_step_records": 47.875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5774426091825308, "calib/avg_num_step_conf": 6.3046875, "calib/ece": 0.5753252032520326, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0032838745800672017, "calib/mean_conf": 0.04256097560975611, "calib/mu_c": 0.04381578947368421, "calib/mu_w": 0.04053191489361701, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.01781642904073745, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2421.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 360.046875, "completions/mean_terminated_length": 368.6880187988281, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.0928, "grad_norm": 0.2116979956626892, "learning_rate": 3.138888888888889e-06, "loss": -0.0403, "num_tokens": 18401310.0, "reward": 1.1824182271957397, "reward_std": 0.30382513999938965, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.41717302799224854, "rewards/format_reward_step_strict": 0.9609375, "step": 87 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.5890178639565988e-07, "aux_brier/mean_group_std": 0.06549508491704442, "aux_brier/mean_r": 0.9563637966917302, "aux_brier/n_active_tok": 224.125, "aux_brier/n_groups": 13.46875, "aux_brier/n_step_records": 56.03125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4663870885754226, "calib/avg_num_step_conf": 7.00390625, "calib/ece": 0.44258964143426294, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0037139407802770294, "calib/mean_conf": 0.04346613545816733, "calib/mu_c": 0.04155737704918033, "calib/mu_w": 0.04527131782945736, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02214444271668734, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2583.0, "completions/max_terminated_length": 2583.0, "completions/mean_length": 437.359375, "completions/mean_terminated_length": 437.359375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.09386666666666667, "grad_norm": 0.07773371785879135, "learning_rate": 3.1111111111111116e-06, "loss": 0.0556, "num_tokens": 18623122.0, "reward": 1.1020925045013428, "reward_std": 0.23363706469535828, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5411823987960815, "rewards/format_reward_step_strict": 0.98046875, "step": 88 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -7.280286938929348e-07, "aux_brier/mean_group_std": 0.0518077710895089, "aux_brier/mean_r": 0.9629384917778879, "aux_brier/n_active_tok": 242.375, "aux_brier/n_groups": 16.125, "aux_brier/n_step_records": 60.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49404533230887443, "calib/avg_num_step_conf": 7.578125, "calib/ece": 0.42131474103585664, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.01051286976565502, "calib/mean_conf": 0.04442231075697212, "calib/mu_c": 0.038684210526315786, "calib/mu_w": 0.049197080291970806, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005776892430278884, "calib/std_conf": 0.06732780115972177, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2464.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 459.70703125, "completions/mean_terminated_length": 459.70703125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09493333333333333, "grad_norm": 0.01032259501516819, "learning_rate": 3.0833333333333336e-06, "loss": 0.0385, "num_tokens": 18849695.0, "reward": 1.0753793716430664, "reward_std": 0.2134411334991455, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5593301057815552, "rewards/format_reward_step_strict": 0.97265625, "step": 89 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.459777750764893e-06, "aux_brier/mean_group_std": 0.09075002981173923, "aux_brier/mean_r": 0.9308858571250507, "aux_brier/n_active_tok": 255.875, "aux_brier/n_groups": 18.0625, "aux_brier/n_step_records": 63.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.564327485380117, "calib/avg_num_step_conf": 7.99609375, "calib/ece": 0.5107936507936508, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004294431731502672, "calib/mean_conf": 0.03682539682539683, "calib/mu_c": 0.03876811594202899, "calib/mu_w": 0.034473684210526316, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01571829354054239, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 447.8984375, "completions/mean_terminated_length": 447.8984375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.096, "grad_norm": 0.07545869052410126, "learning_rate": 3.055555555555556e-06, "loss": 0.0133, "num_tokens": 19067677.0, "reward": 1.1536109447479248, "reward_std": 0.18574708700180054, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.48163124918937683, "rewards/format_reward_step_strict": 0.98046875, "step": 90 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.4969813386954698e-06, "aux_brier/mean_group_std": 0.05015517888576402, "aux_brier/mean_r": 0.970973050927131, "aux_brier/n_active_tok": 242.625, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 60.65625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5331041507512095, "calib/avg_num_step_conf": 7.5859375, "calib/ece": 0.4921513944223108, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011261777438248063, "calib/mean_conf": 0.03454183266932271, "calib/mu_c": 0.03507575757575758, "calib/mu_w": 0.03394957983193277, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00039840637450199205, "calib/std_conf": 0.013658077124723993, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 460.265625, "completions/mean_terminated_length": 460.265625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.09706666666666666, "grad_norm": 0.2291233241558075, "learning_rate": 3.0277777777777776e-06, "loss": 0.0773, "num_tokens": 19293217.0, "reward": 1.1307750940322876, "reward_std": 0.2191537320613861, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.4996629059314728, "rewards/format_reward_step_strict": 0.98046875, "step": 91 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.36811778611301e-05, "aux_brier/mean_group_std": 0.04263066455186372, "aux_brier/mean_r": 0.9737503482426485, "aux_brier/n_active_tok": 222.5, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 55.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.50998550998551, "calib/avg_num_step_conf": 6.953125, "calib/ece": 0.5282283464566929, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006205506205506223, "calib/mean_conf": 0.034763779527559054, "calib/mu_c": 0.03503496503496503, "calib/mu_w": 0.03441441441441441, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.013241521841382182, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2014.0, "completions/max_terminated_length": 2014.0, "completions/mean_length": 405.9453125, "completions/mean_terminated_length": 405.9453125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.09813333333333334, "grad_norm": 0.16720464825630188, "learning_rate": 3e-06, "loss": 0.0335, "num_tokens": 19503859.0, "reward": 1.166670322418213, "reward_std": 0.20974203944206238, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.4635566473007202, "rewards/format_reward_step_strict": 0.984375, "step": 92 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.93929180403974e-07, "aux_brier/mean_group_std": 0.04500498375369191, "aux_brier/mean_r": 0.9720751749813877, "aux_brier/n_active_tok": 249.0, "aux_brier/n_groups": 15.71875, "aux_brier/n_step_records": 62.25, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4642267124158089, "calib/avg_num_step_conf": 7.78125, "calib/ece": 0.44856573705179287, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011767696022366297, "calib/mean_conf": 0.03749003984063745, "calib/mu_c": 0.036885245901639344, "calib/mu_w": 0.03806201550387597, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014243013958146587, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 458.41015625, "completions/mean_terminated_length": 458.41015625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.0992, "grad_norm": 0.20912809669971466, "learning_rate": 2.9722222222222225e-06, "loss": 0.0302, "num_tokens": 19726988.0, "reward": 1.0982401371002197, "reward_std": 0.24127297103405, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5335855484008789, "rewards/format_reward_step_strict": 0.9765625, "step": 93 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.1607752745123054e-06, "aux_brier/mean_group_std": 0.060573612370259655, "aux_brier/mean_r": 0.9575719422265172, "aux_brier/n_active_tok": 226.0, "aux_brier/n_groups": 14.25, "aux_brier/n_step_records": 56.5, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5247115384615385, "calib/avg_num_step_conf": 7.0625, "calib/ece": 0.48696, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006410256410256526, "calib/mean_conf": 0.034, "calib/mu_c": 0.03430769230769231, "calib/mu_w": 0.03366666666666666, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00048, "calib/std_conf": 0.013236313686219437, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 423.81640625, "completions/mean_terminated_length": 423.81640625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.10026666666666667, "grad_norm": 0.016376424580812454, "learning_rate": 2.944444444444445e-06, "loss": 0.0604, "num_tokens": 19944165.0, "reward": 1.1226438283920288, "reward_std": 0.21318775415420532, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.4983879029750824, "rewards/format_reward_step_strict": 0.97265625, "step": 94 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.345612032912282e-06, "aux_brier/mean_group_std": 0.0809558229476364, "aux_brier/mean_r": 0.9460285799274464, "aux_brier/n_active_tok": 249.875, "aux_brier/n_groups": 15.0625, "aux_brier/n_step_records": 62.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5558622002283974, "calib/avg_num_step_conf": 7.80859375, "calib/ece": 0.522292490118577, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008254028676563871, "calib/mean_conf": 0.04055335968379447, "calib/mu_c": 0.04091549295774647, "calib/mu_w": 0.04009009009009008, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0007905138339920949, "calib/std_conf": 0.01934935012791396, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 456.74609375, "completions/mean_terminated_length": 456.74609375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.10133333333333333, "grad_norm": 0.048102181404829025, "learning_rate": 2.916666666666667e-06, "loss": 0.0155, "num_tokens": 20167220.0, "reward": 1.1631183624267578, "reward_std": 0.2865407466888428, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.4727863073348999, "rewards/format_reward_step_strict": 0.98046875, "step": 95 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.068151539025287e-06, "aux_brier/mean_group_std": 0.05631804725341152, "aux_brier/mean_r": 0.9589422327445754, "aux_brier/n_active_tok": 242.25, "aux_brier/n_groups": 16.0, "aux_brier/n_step_records": 60.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4966334500403986, "calib/avg_num_step_conf": 7.5703125, "calib/ece": 0.586984126984127, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00016967411796390414, "calib/mean_conf": 0.04, "calib/mu_c": 0.03993670886075949, "calib/mu_w": 0.0401063829787234, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013540064007726602, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2541.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 412.11328125, "completions/mean_terminated_length": 412.11328125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.1024, "grad_norm": 0.08516115695238113, "learning_rate": 2.888888888888889e-06, "loss": 0.0654, "num_tokens": 20378537.0, "reward": 1.2169634103775024, "reward_std": 0.1943175047636032, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.41472887992858887, "rewards/format_reward_step_strict": 0.984375, "step": 96 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.663974127597001e-06, "aux_brier/mean_group_std": 0.053727748085427046, "aux_brier/mean_r": 0.9690100896784644, "aux_brier/n_active_tok": 240.375, "aux_brier/n_groups": 14.0625, "aux_brier/n_step_records": 60.09375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4723899913718723, "calib/avg_num_step_conf": 7.51171875, "calib/ece": 0.48066666666666674, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001413780352520648, "calib/mean_conf": 0.040901960784313726, "calib/mu_c": 0.040225563909774435, "calib/mu_w": 0.04163934426229508, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014292813087676144, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2349.0, "completions/max_terminated_length": 2349.0, "completions/mean_length": 420.0703125, "completions/mean_terminated_length": 420.0703125, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.10346666666666667, "grad_norm": 0.1330837458372116, "learning_rate": 2.861111111111111e-06, "loss": 0.0271, "num_tokens": 20591147.0, "reward": 1.1467005014419556, "reward_std": 0.2592535614967346, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5164894461631775, "rewards/format_reward_step_strict": 0.99609375, "step": 97 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.722335791129684e-06, "aux_brier/mean_group_std": 0.08985709111939484, "aux_brier/mean_r": 0.9369056604417026, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 18.0625, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4499672346002621, "calib/avg_num_step_conf": 8.3125, "calib/ece": 0.5217269076305221, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015190039318479717, "calib/mean_conf": 0.04052208835341366, "calib/mu_c": 0.039857142857142855, "calib/mu_w": 0.04137614678899083, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014118279545522784, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2665.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 461.078125, "completions/mean_terminated_length": 462.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.10453333333333334, "grad_norm": 0.23408974707126617, "learning_rate": 2.8333333333333335e-06, "loss": 0.0638, "num_tokens": 20815367.0, "reward": 1.1500990390777588, "reward_std": 0.23604661226272583, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.4675839841365814, "rewards/format_reward_step_strict": 0.97265625, "step": 98 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.0724527822747465e-06, "aux_brier/mean_group_std": 0.03908728093991029, "aux_brier/mean_r": 0.9764337692755751, "aux_brier/n_active_tok": 263.875, "aux_brier/n_groups": 16.15625, "aux_brier/n_step_records": 65.96875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5371052631578948, "calib/avg_num_step_conf": 8.24609375, "calib/ece": 0.26079681274900396, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001084962406015029, "calib/mean_conf": 0.043585657370517936, "calib/mu_c": 0.044342105263157884, "calib/mu_w": 0.043257142857142855, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007968127490039841, "calib/std_conf": 0.018225338996572207, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 499.046875, "completions/mean_terminated_length": 499.046875, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.1056, "grad_norm": 0.011097099632024765, "learning_rate": 2.805555555555556e-06, "loss": 0.0774, "num_tokens": 21048923.0, "reward": 0.9679489731788635, "reward_std": 0.22912907600402832, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7077336311340332, "rewards/format_reward_step_strict": 0.98046875, "step": 99 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.6559374108336975e-06, "aux_brier/mean_group_std": 0.06833585449856798, "aux_brier/mean_r": 0.9478820524765457, "aux_brier/n_active_tok": 245.25, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 61.3125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5469354838709678, "calib/avg_num_step_conf": 7.6640625, "calib/ece": 0.4566666666666666, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002018709677419357, "calib/mean_conf": 0.04132530120481928, "calib/mu_c": 0.042338709677419366, "calib/mu_w": 0.04032000000000001, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012684406928435523, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 462.703125, "completions/mean_terminated_length": 462.703125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.10666666666666667, "grad_norm": 0.026581548154354095, "learning_rate": 2.7777777777777783e-06, "loss": 0.0901, "num_tokens": 21274783.0, "reward": 1.1005620956420898, "reward_std": 0.26199617981910706, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5272483825683594, "rewards/format_reward_step_strict": 0.96875, "step": 100 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.104555376253764e-07, "aux_brier/mean_group_std": 0.03822865550783053, "aux_brier/mean_r": 0.9741097669206261, "aux_brier/n_active_tok": 295.25, "aux_brier/n_groups": 19.25, "aux_brier/n_step_records": 73.8125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4828681956341532, "calib/avg_num_step_conf": 9.2265625, "calib/ece": 0.33858870967741933, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00020033158331031475, "calib/mean_conf": 0.04044354838709678, "calib/mu_c": 0.0403191489361702, "calib/mu_w": 0.04051948051948052, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012222930868734072, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 522.08984375, "completions/mean_terminated_length": 524.1372680664062, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.10773333333333333, "grad_norm": 0.0646122470498085, "learning_rate": 2.7500000000000004e-06, "loss": 0.0916, "num_tokens": 21515430.0, "reward": 1.008923053741455, "reward_std": 0.24845989048480988, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.629442572593689, "rewards/format_reward_step_strict": 0.96875, "step": 101 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.4775850829837918e-06, "aux_brier/mean_group_std": 0.050068782589601665, "aux_brier/mean_r": 0.9631128284547863, "aux_brier/n_active_tok": 236.375, "aux_brier/n_groups": 13.59375, "aux_brier/n_step_records": 59.09375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5248051948051948, "calib/avg_num_step_conf": 7.4453125, "calib/ece": 0.5674015748031497, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00016883116883117943, "calib/mean_conf": 0.03889763779527559, "calib/mu_c": 0.03883116883116882, "calib/mu_w": 0.039, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01036562660394584, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1889.0, "completions/max_terminated_length": 1889.0, "completions/mean_length": 399.2734375, "completions/mean_terminated_length": 400.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.1088, "grad_norm": 0.37528353929519653, "learning_rate": 2.7222222222222224e-06, "loss": 0.0008, "num_tokens": 21724340.0, "reward": 1.210496425628662, "reward_std": 0.202207550406456, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.43573594093322754, "rewards/format_reward_step_strict": 0.9921875, "step": 102 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.4638724062931985e-06, "aux_brier/mean_group_std": 0.048043066482962964, "aux_brier/mean_r": 0.9690465192308666, "aux_brier/n_active_tok": 287.125, "aux_brier/n_groups": 19.71875, "aux_brier/n_step_records": 71.78125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5147196261682244, "calib/avg_num_step_conf": 9.15625, "calib/ece": 0.5265991902834009, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011815754339118828, "calib/mean_conf": 0.040202429149797575, "calib/mu_c": 0.04071428571428571, "calib/mu_w": 0.03953271028037383, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011919072346336648, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2856.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 548.0, "completions/mean_terminated_length": 552.31494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.10986666666666667, "grad_norm": 0.5299721956253052, "learning_rate": 2.6944444444444444e-06, "loss": 0.0575, "num_tokens": 21969180.0, "reward": 1.1415690183639526, "reward_std": 0.2539811134338379, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.45690077543258667, "rewards/format_reward_step_strict": 0.9609375, "step": 103 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.285170763702496e-06, "aux_brier/mean_group_std": 0.04751414936699034, "aux_brier/mean_r": 0.9696152340986504, "aux_brier/n_active_tok": 254.125, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 63.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5002827698881488, "calib/avg_num_step_conf": 7.94140625, "calib/ece": 0.3869019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 2.5135101168749507e-06, "calib/mean_conf": 0.040549019607843136, "calib/mu_c": 0.04055045871559633, "calib/mu_w": 0.040547945205479455, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01046434417624111, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2773.0, "completions/max_terminated_length": 2773.0, "completions/mean_length": 455.11328125, "completions/mean_terminated_length": 455.11328125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.11093333333333333, "grad_norm": 0.3309130370616913, "learning_rate": 2.666666666666667e-06, "loss": 0.028, "num_tokens": 22192369.0, "reward": 1.0746023654937744, "reward_std": 0.20162808895111084, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6030968427658081, "rewards/format_reward_step_strict": 0.99609375, "step": 104 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.7118542688109315e-06, "aux_brier/mean_group_std": 0.0437891455200206, "aux_brier/mean_r": 0.975865268789005, "aux_brier/n_active_tok": 267.875, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 66.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48979013045944414, "calib/avg_num_step_conf": 8.37109375, "calib/ece": 0.44857142857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0014520703346568298, "calib/mean_conf": 0.03952380952380953, "calib/mu_c": 0.038780487804878055, "calib/mu_w": 0.040232558139534885, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011293848786315641, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2521.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 486.1171875, "completions/mean_terminated_length": 486.1171875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.112, "grad_norm": 1.2331123352050781, "learning_rate": 2.6388888888888893e-06, "loss": 0.0586, "num_tokens": 22422575.0, "reward": 1.1114397048950195, "reward_std": 0.26240772008895874, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.539508581161499, "rewards/format_reward_step_strict": 0.984375, "step": 105 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.8908124227733367e-07, "aux_brier/mean_group_std": 0.0741900223851166, "aux_brier/mean_r": 0.9525063079484763, "aux_brier/n_active_tok": 244.25, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 61.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5614988558352404, "calib/avg_num_step_conf": 7.6328125, "calib/ece": 0.4142857142857143, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008848207475209702, "calib/mean_conf": 0.04206349206349207, "calib/mu_c": 0.041578947368421056, "calib/mu_w": 0.042463768115942026, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.001984126984126984, "calib/std_conf": 0.030765819529106585, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2886.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 457.40625, "completions/mean_terminated_length": 457.40625, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.11306666666666666, "grad_norm": 0.730738639831543, "learning_rate": 2.6111111111111113e-06, "loss": 0.0633, "num_tokens": 22644255.0, "reward": 1.0752407312393188, "reward_std": 0.24985721707344055, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5665878653526306, "rewards/format_reward_step_strict": 0.9765625, "step": 106 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.50917928452155e-06, "aux_brier/mean_group_std": 0.057776691252402666, "aux_brier/mean_r": 0.960953601038585, "aux_brier/n_active_tok": 244.25, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 61.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5284919162470183, "calib/avg_num_step_conf": 7.796875, "calib/ece": 0.5722222222222223, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010204081632653114, "calib/mean_conf": 0.03888888888888889, "calib/mu_c": 0.039285714285714285, "calib/mu_w": 0.038265306122448974, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009148008910107181, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2413.0, "completions/max_terminated_length": 2413.0, "completions/mean_length": 437.6015625, "completions/mean_terminated_length": 439.31768798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.11413333333333334, "grad_norm": 0.26983603835105896, "learning_rate": 2.5833333333333337e-06, "loss": 0.0537, "num_tokens": 22860897.0, "reward": 1.2008767127990723, "reward_std": 0.24636337161064148, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.4285070300102234, "rewards/format_reward_step_strict": 0.984375, "step": 107 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.711407311035586e-06, "aux_brier/mean_group_std": 0.06834700084118482, "aux_brier/mean_r": 0.9478834800573411, "aux_brier/n_active_tok": 274.25, "aux_brier/n_groups": 16.40625, "aux_brier/n_step_records": 68.5625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5430125692661171, "calib/avg_num_step_conf": 8.57421875, "calib/ece": 0.5663453815261044, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00215164211379916, "calib/mean_conf": 0.040080321285140566, "calib/mu_c": 0.040927152317880786, "calib/mu_w": 0.038775510204081626, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.009356481672849375, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 504.4296875, "completions/mean_terminated_length": 504.4296875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.1152, "grad_norm": 0.26578643918037415, "learning_rate": 2.5555555555555557e-06, "loss": 0.0743, "num_tokens": 23093263.0, "reward": 1.1815803050994873, "reward_std": 0.25909295678138733, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.42944610118865967, "rewards/format_reward_step_strict": 0.96875, "step": 108 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.3005259867112677e-07, "aux_brier/mean_group_std": 0.034005848151057484, "aux_brier/mean_r": 0.9785348621308146, "aux_brier/n_active_tok": 309.25, "aux_brier/n_groups": 23.28125, "aux_brier/n_step_records": 77.3125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5537815126050419, "calib/avg_num_step_conf": 9.921875, "calib/ece": 0.44991147540983606, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0030211764705882424, "calib/mean_conf": 0.03779344262295081, "calib/mu_c": 0.039341176470588234, "calib/mu_w": 0.03631999999999999, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00963398814056824, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2995.0, "completions/max_terminated_length": 2995.0, "completions/mean_length": 535.2109375, "completions/mean_terminated_length": 539.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.11626666666666667, "grad_norm": 0.37634411454200745, "learning_rate": 2.5277777777777778e-06, "loss": 0.1403, "num_tokens": 23334877.0, "reward": 1.0663983821868896, "reward_std": 0.20794661343097687, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5234063863754272, "rewards/format_reward_step_strict": 0.94140625, "step": 109 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.382355060144171e-06, "aux_brier/mean_group_std": 0.05514414110685423, "aux_brier/mean_r": 0.9610729802289838, "aux_brier/n_active_tok": 238.125, "aux_brier/n_groups": 14.125, "aux_brier/n_step_records": 59.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4605263157894737, "calib/avg_num_step_conf": 7.44140625, "calib/ece": 0.4095669291338583, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008709273182957306, "calib/mean_conf": 0.039251968503937, "calib/mu_c": 0.0387719298245614, "calib/mu_w": 0.03964285714285713, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.009952223273896953, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2371.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 466.74609375, "completions/mean_terminated_length": 466.74609375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.11733333333333333, "grad_norm": 0.0723707377910614, "learning_rate": 2.5e-06, "loss": 0.0417, "num_tokens": 23559284.0, "reward": 1.0834276676177979, "reward_std": 0.2757888436317444, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5758980512619019, "rewards/format_reward_step_strict": 0.98828125, "step": 110 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.8873351762815904e-06, "aux_brier/mean_group_std": 0.06157601710370413, "aux_brier/mean_r": 0.9618023827397723, "aux_brier/n_active_tok": 268.375, "aux_brier/n_groups": 16.75, "aux_brier/n_step_records": 67.09375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5699152542372881, "calib/avg_num_step_conf": 8.5234375, "calib/ece": 0.43191999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017745249101181262, "calib/mean_conf": 0.04008, "calib/mu_c": 0.041016949152542365, "calib/mu_w": 0.03924242424242424, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.01226350683939957, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 510.5, "completions/mean_terminated_length": 514.5196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.1184, "grad_norm": 0.2618515193462372, "learning_rate": 2.4722222222222226e-06, "loss": -0.0142, "num_tokens": 23797380.0, "reward": 1.0773262977600098, "reward_std": 0.2668326795101166, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5436800718307495, "rewards/format_reward_step_strict": 0.9609375, "step": 111 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.8973257561727408e-06, "aux_brier/mean_group_std": 0.08752773473943101, "aux_brier/mean_r": 0.94084045562474, "aux_brier/n_active_tok": 256.625, "aux_brier/n_groups": 15.6875, "aux_brier/n_step_records": 64.15625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5182474629195941, "calib/avg_num_step_conf": 8.34375, "calib/ece": 0.46939112903225805, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018900598490762394, "calib/mean_conf": 0.03867338709677419, "calib/mu_c": 0.039603174603174605, "calib/mu_w": 0.037713114754098366, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.011155886691895637, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2973.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 520.94921875, "completions/mean_terminated_length": 527.1265258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.11946666666666667, "grad_norm": 1.2074123620986938, "learning_rate": 2.4444444444444447e-06, "loss": 0.0074, "num_tokens": 24038663.0, "reward": 1.098163366317749, "reward_std": 0.25116461515426636, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.509840726852417, "rewards/format_reward_step_strict": 0.95703125, "step": 112 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.4069467026978089e-06, "aux_brier/mean_group_std": 0.07493280707516929, "aux_brier/mean_r": 0.9570411421452618, "aux_brier/n_active_tok": 271.625, "aux_brier/n_groups": 18.28125, "aux_brier/n_step_records": 67.90625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5311181102362205, "calib/avg_num_step_conf": 8.7578125, "calib/ece": 0.4559920634920635, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013499212598425125, "calib/mean_conf": 0.04003968253968254, "calib/mu_c": 0.04072, "calib/mu_w": 0.03937007874015749, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00940693833232194, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2576.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 474.94921875, "completions/mean_terminated_length": 476.8117980957031, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.12053333333333334, "grad_norm": 0.43918195366859436, "learning_rate": 2.4166666666666667e-06, "loss": 0.0525, "num_tokens": 24265450.0, "reward": 1.1140172481536865, "reward_std": 0.26322633028030396, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5341941118240356, "rewards/format_reward_step_strict": 0.984375, "step": 113 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 8.792451297034987e-07, "aux_brier/mean_group_std": 0.046124216253668957, "aux_brier/mean_r": 0.9682924531124371, "aux_brier/n_active_tok": 258.75, "aux_brier/n_groups": 15.46875, "aux_brier/n_step_records": 64.6875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5792217042217042, "calib/avg_num_step_conf": 8.12109375, "calib/ece": 0.5732941176470588, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005011655011655028, "calib/mean_conf": 0.041215686274509805, "calib/mu_c": 0.04141025641025641, "calib/mu_w": 0.04090909090909091, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001372549019607843, "calib/std_conf": 0.02223961641428461, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1891.0, "completions/max_terminated_length": 1891.0, "completions/mean_length": 453.24609375, "completions/mean_terminated_length": 455.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.1216, "grad_norm": 0.5156078934669495, "learning_rate": 2.388888888888889e-06, "loss": 0.003, "num_tokens": 24486505.0, "reward": 1.213362455368042, "reward_std": 0.21056273579597473, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.4315750002861023, "rewards/format_reward_step_strict": 0.9921875, "step": 114 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.0225195213875438e-06, "aux_brier/mean_group_std": 0.06268015443666367, "aux_brier/mean_r": 0.9657730870218638, "aux_brier/n_active_tok": 231.375, "aux_brier/n_groups": 12.71875, "aux_brier/n_step_records": 57.84375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5528420275590551, "calib/avg_num_step_conf": 7.23046875, "calib/ece": 0.45827450980392154, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017236712598425186, "calib/mean_conf": 0.03976470588235294, "calib/mu_c": 0.04062992125984252, "calib/mu_w": 0.03890625, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.01277035205388487, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2608.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 440.875, "completions/mean_terminated_length": 440.875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.12266666666666666, "grad_norm": 0.1856922209262848, "learning_rate": 2.361111111111111e-06, "loss": 0.0393, "num_tokens": 24704633.0, "reward": 1.1238644123077393, "reward_std": 0.2826547920703888, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5345203280448914, "rewards/format_reward_step_strict": 0.98828125, "step": 115 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.0355788804261579e-06, "aux_brier/mean_group_std": 0.02800406959220724, "aux_brier/mean_r": 0.9824136114310306, "aux_brier/n_active_tok": 290.375, "aux_brier/n_groups": 19.4375, "aux_brier/n_step_records": 72.59375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.51829740544727, "calib/avg_num_step_conf": 9.07421875, "calib/ece": 0.4704417670682731, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011410868723376877, "calib/mean_conf": 0.039598393574297196, "calib/mu_c": 0.040157480314960636, "calib/mu_w": 0.03901639344262295, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.010895605274002284, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 565.453125, "completions/mean_terminated_length": 565.453125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.12373333333333333, "grad_norm": 0.6097423434257507, "learning_rate": 2.3333333333333336e-06, "loss": 0.0612, "num_tokens": 24953909.0, "reward": 1.1090649366378784, "reward_std": 0.26453733444213867, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5143847465515137, "rewards/format_reward_step_strict": 0.96875, "step": 116 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.478230049034579e-06, "aux_brier/mean_group_std": 0.04360485184978645, "aux_brier/mean_r": 0.9724314700344239, "aux_brier/n_active_tok": 273.875, "aux_brier/n_groups": 17.1875, "aux_brier/n_step_records": 68.46875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5554187192118226, "calib/avg_num_step_conf": 8.7578125, "calib/ece": 0.4238955823293173, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002448794399792592, "calib/mean_conf": 0.041967871485943775, "calib/mu_c": 0.04327586206896551, "calib/mu_w": 0.04082706766917292, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.010439295351569445, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2090.0, "completions/max_terminated_length": 2090.0, "completions/mean_length": 486.9375, "completions/mean_terminated_length": 488.8470764160156, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.1248, "grad_norm": 0.8644006848335266, "learning_rate": 2.305555555555556e-06, "loss": 0.0284, "num_tokens": 25185165.0, "reward": 1.0777117013931274, "reward_std": 0.2392779290676117, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5530344247817993, "rewards/format_reward_step_strict": 0.96484375, "step": 117 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.743756228580253e-06, "aux_brier/mean_group_std": 0.03310747233374885, "aux_brier/mean_r": 0.9780488771064113, "aux_brier/n_active_tok": 292.0, "aux_brier/n_groups": 20.53125, "aux_brier/n_step_records": 73.0, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5942113821138211, "calib/avg_num_step_conf": 9.125, "calib/ece": 0.45391129032258065, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025463414634146336, "calib/mean_conf": 0.04286290322580645, "calib/mu_c": 0.04414634146341463, "calib/mu_w": 0.0416, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0004032258064516129, "calib/std_conf": 0.010408460173540713, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 559.9375, "completions/mean_terminated_length": 559.9375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.12586666666666665, "grad_norm": 0.6745241284370422, "learning_rate": 2.277777777777778e-06, "loss": 0.0691, "num_tokens": 25432517.0, "reward": 1.097048282623291, "reward_std": 0.24959829449653625, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5288183689117432, "rewards/format_reward_step_strict": 0.96875, "step": 118 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.710695929052486e-06, "aux_brier/mean_group_std": 0.07189017199223828, "aux_brier/mean_r": 0.9597806939114144, "aux_brier/n_active_tok": 293.125, "aux_brier/n_groups": 20.0, "aux_brier/n_step_records": 73.28125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5438347107438016, "calib/avg_num_step_conf": 9.34375, "calib/ece": 0.4652439024390244, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014512396694214974, "calib/mean_conf": 0.042886178861788624, "calib/mu_c": 0.043600000000000014, "calib/mu_w": 0.042148760330578516, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.00980517803475346, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 577.5390625, "completions/mean_terminated_length": 579.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.12693333333333334, "grad_norm": 0.27141526341438293, "learning_rate": 2.25e-06, "loss": -0.019, "num_tokens": 25685431.0, "reward": 1.0970935821533203, "reward_std": 0.2445300966501236, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5133745670318604, "rewards/format_reward_step_strict": 0.9609375, "step": 119 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.0665530983409184e-06, "aux_brier/mean_group_std": 0.06747079615723417, "aux_brier/mean_r": 0.9619801587336653, "aux_brier/n_active_tok": 239.875, "aux_brier/n_groups": 13.8125, "aux_brier/n_step_records": 59.96875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.569947014732489, "calib/avg_num_step_conf": 7.5859375, "calib/ece": 0.5368650793650794, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016141121736883007, "calib/mean_conf": 0.043293650793650794, "calib/mu_c": 0.04397260273972603, "calib/mu_w": 0.04235849056603773, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.011049360722955488, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2834.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 481.7421875, "completions/mean_terminated_length": 483.63140869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.128, "grad_norm": 1.0078459978103638, "learning_rate": 2.222222222222222e-06, "loss": 0.0569, "num_tokens": 25915445.0, "reward": 1.1690651178359985, "reward_std": 0.23911015689373016, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.45751091837882996, "rewards/format_reward_step_strict": 0.96875, "step": 120 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.75080295375885e-06, "aux_brier/mean_group_std": 0.040355337648619094, "aux_brier/mean_r": 0.972082247007521, "aux_brier/n_active_tok": 272.125, "aux_brier/n_groups": 16.9375, "aux_brier/n_step_records": 68.03125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5502019437081914, "calib/avg_num_step_conf": 8.50390625, "calib/ece": 0.5065612648221345, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014779755143253803, "calib/mean_conf": 0.043794466403162056, "calib/mu_c": 0.04446043165467626, "calib/mu_w": 0.04298245614035088, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00047430830039525685, "calib/std_conf": 0.011619343326530134, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2665.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 547.52734375, "completions/mean_terminated_length": 547.52734375, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.12906666666666666, "grad_norm": 0.7970133423805237, "learning_rate": 2.1944444444444445e-06, "loss": 0.0269, "num_tokens": 26160668.0, "reward": 1.1600005626678467, "reward_std": 0.27700334787368774, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.4915648102760315, "rewards/format_reward_step_strict": 0.98828125, "step": 121 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.735171854006957e-06, "aux_brier/mean_group_std": 0.05605299531956594, "aux_brier/mean_r": 0.9637033242442844, "aux_brier/n_active_tok": 260.875, "aux_brier/n_groups": 15.96875, "aux_brier/n_step_records": 65.21875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6272407611693326, "calib/avg_num_step_conf": 8.23046875, "calib/ece": 0.5545121951219513, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008432156646442375, "calib/mean_conf": 0.04711382113821139, "calib/mu_c": 0.05047297297297298, "calib/mu_w": 0.0420408163265306, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.040637741929624244, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2685.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 513.359375, "completions/mean_terminated_length": 517.4015502929688, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.13013333333333332, "grad_norm": 0.2785504460334778, "learning_rate": 2.166666666666667e-06, "loss": 0.0264, "num_tokens": 26399432.0, "reward": 1.1661373376846313, "reward_std": 0.24181430041790009, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.43017420172691345, "rewards/format_reward_step_strict": 0.953125, "step": 122 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.8249857914598433e-06, "aux_brier/mean_group_std": 0.05829732431195981, "aux_brier/mean_r": 0.9573845217098387, "aux_brier/n_active_tok": 268.625, "aux_brier/n_groups": 16.15625, "aux_brier/n_step_records": 67.15625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4807990189750871, "calib/avg_num_step_conf": 8.640625, "calib/ece": 0.4654216867469879, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017122757196334146, "calib/mean_conf": 0.044618473895582336, "calib/mu_c": 0.04377952755905511, "calib/mu_w": 0.04549180327868853, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014505054515800577, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 530.5078125, "completions/mean_terminated_length": 536.7984619140625, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.1312, "grad_norm": 0.4846332371234894, "learning_rate": 2.138888888888889e-06, "loss": -0.0207, "num_tokens": 26640530.0, "reward": 1.1089584827423096, "reward_std": 0.31316617131233215, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5139589905738831, "rewards/format_reward_step_strict": 0.96875, "step": 123 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.8679218291860487e-07, "aux_brier/mean_group_std": 0.057472229525174494, "aux_brier/mean_r": 0.9662159645971783, "aux_brier/n_active_tok": 260.625, "aux_brier/n_groups": 15.46875, "aux_brier/n_step_records": 65.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5304846274101094, "calib/avg_num_step_conf": 8.453125, "calib/ece": 0.5553754940711463, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020915841584158365, "calib/mean_conf": 0.04541501976284585, "calib/mu_c": 0.04624999999999999, "calib/mu_w": 0.044158415841584156, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014835251113226726, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1870.0, "completions/max_terminated_length": 1870.0, "completions/mean_length": 511.12109375, "completions/mean_terminated_length": 515.1456909179688, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.13226666666666667, "grad_norm": 0.48033249378204346, "learning_rate": 2.1111111111111114e-06, "loss": -0.0299, "num_tokens": 26878193.0, "reward": 1.1996898651123047, "reward_std": 0.19524884223937988, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.4471972584724426, "rewards/format_reward_step_strict": 0.98828125, "step": 124 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.294555074201849e-07, "aux_brier/mean_group_std": 0.06517470467191165, "aux_brier/mean_r": 0.9624438149609646, "aux_brier/n_active_tok": 281.375, "aux_brier/n_groups": 19.8125, "aux_brier/n_step_records": 70.34375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5046576618537495, "calib/avg_num_step_conf": 9.18359375, "calib/ece": 0.49658536585365853, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004537893406081836, "calib/mean_conf": 0.04577235772357724, "calib/mu_c": 0.045563909774436084, "calib/mu_w": 0.04601769911504427, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008536585365853661, "calib/std_conf": 0.01133439474331281, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2869.0, "completions/max_terminated_length": 2869.0, "completions/mean_length": 545.97265625, "completions/mean_terminated_length": 550.2716674804688, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.13333333333333333, "grad_norm": 0.3349872827529907, "learning_rate": 2.0833333333333334e-06, "loss": 0.054, "num_tokens": 27122770.0, "reward": 1.1255595684051514, "reward_std": 0.27273088693618774, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.4866132438182831, "rewards/format_reward_step_strict": 0.9609375, "step": 125 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.586428731011895e-06, "aux_brier/mean_group_std": 0.07592454433965073, "aux_brier/mean_r": 0.9427247285566948, "aux_brier/n_active_tok": 298.0, "aux_brier/n_groups": 19.84375, "aux_brier/n_step_records": 74.5, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6292750117583819, "calib/avg_num_step_conf": 9.43359375, "calib/ece": 0.4579918032786885, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004736276288382718, "calib/mean_conf": 0.04610655737704919, "calib/mu_c": 0.04845528455284553, "calib/mu_w": 0.04371900826446281, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012642828186042466, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2808.0, "completions/max_terminated_length": 2808.0, "completions/mean_length": 587.05078125, "completions/mean_terminated_length": 591.6732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.1344, "grad_norm": 0.5688815712928772, "learning_rate": 2.0555555555555555e-06, "loss": 0.0485, "num_tokens": 27378519.0, "reward": 1.0862913131713867, "reward_std": 0.21779096126556396, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5170402526855469, "rewards/format_reward_step_strict": 0.953125, "step": 126 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.528456068962015e-07, "aux_brier/mean_group_std": 0.04799782578420207, "aux_brier/mean_r": 0.968363675771192, "aux_brier/n_active_tok": 301.125, "aux_brier/n_groups": 20.1875, "aux_brier/n_step_records": 75.28125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5944516129032258, "calib/avg_num_step_conf": 9.52734375, "calib/ece": 0.4515261044176706, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003987741935483874, "calib/mean_conf": 0.046465863453815266, "calib/mu_c": 0.04846774193548388, "calib/mu_w": 0.044480000000000006, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014629282380530732, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2840.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 565.55078125, "completions/mean_terminated_length": 567.7686767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.13546666666666668, "grad_norm": 0.1792018562555313, "learning_rate": 2.027777777777778e-06, "loss": 0.0547, "num_tokens": 27626972.0, "reward": 1.101007342338562, "reward_std": 0.21284964680671692, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5290297269821167, "rewards/format_reward_step_strict": 0.96875, "step": 127 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.730827152268688e-06, "aux_brier/mean_group_std": 0.0448499201654236, "aux_brier/mean_r": 0.9670285229414458, "aux_brier/n_active_tok": 243.125, "aux_brier/n_groups": 14.4375, "aux_brier/n_step_records": 60.78125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5500327653997379, "calib/avg_num_step_conf": 7.8515625, "calib/ece": 0.5187349397590362, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011120576671035387, "calib/mean_conf": 0.04544176706827309, "calib/mu_c": 0.04592857142857143, "calib/mu_w": 0.04481651376146789, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0009638554216867469, "calib/std_conf": 0.012491186551156098, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 522.80859375, "completions/mean_terminated_length": 531.107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.13653333333333334, "grad_norm": 0.23012739419937134, "learning_rate": 2.0000000000000003e-06, "loss": -0.0033, "num_tokens": 27867475.0, "reward": 1.146785020828247, "reward_std": 0.24630600214004517, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.46995264291763306, "rewards/format_reward_step_strict": 0.96484375, "step": 128 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.2225584035725525e-06, "aux_brier/mean_group_std": 0.07485034799952374, "aux_brier/mean_r": 0.9532100457506557, "aux_brier/n_active_tok": 255.125, "aux_brier/n_groups": 15.46875, "aux_brier/n_step_records": 63.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5524647887323944, "calib/avg_num_step_conf": 8.0546875, "calib/ece": 0.5166269841269842, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014711907810499372, "calib/mean_conf": 0.04773809523809523, "calib/mu_c": 0.04838028169014084, "calib/mu_w": 0.0469090909090909, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004365079365079365, "calib/std_conf": 0.01185405869240545, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2399.0, "completions/max_terminated_length": 2399.0, "completions/mean_length": 496.1953125, "completions/mean_terminated_length": 498.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.1376, "grad_norm": 0.1529451161623001, "learning_rate": 1.9722222222222224e-06, "loss": 0.015, "num_tokens": 28096885.0, "reward": 1.1651663780212402, "reward_std": 0.17689114809036255, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.48097774386405945, "rewards/format_reward_step_strict": 0.98046875, "step": 129 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.2395922233121581e-06, "aux_brier/mean_group_std": 0.05960076269232059, "aux_brier/mean_r": 0.9625159202485318, "aux_brier/n_active_tok": 248.5, "aux_brier/n_groups": 13.78125, "aux_brier/n_step_records": 62.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6492900213966154, "calib/avg_num_step_conf": 7.8125, "calib/ece": 0.5746484374999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005729754263113521, "calib/mean_conf": 0.04644531250000001, "calib/mu_c": 0.04861635220125785, "calib/mu_w": 0.04288659793814433, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.0, "calib/std_conf": 0.013208844452765114, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1577.0, "completions/max_terminated_length": 1577.0, "completions/mean_length": 479.11328125, "completions/mean_terminated_length": 480.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.13866666666666666, "grad_norm": 0.16696175932884216, "learning_rate": 1.944444444444445e-06, "loss": 0.012, "num_tokens": 28324826.0, "reward": 1.210479974746704, "reward_std": 0.17544493079185486, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.4200449287891388, "rewards/format_reward_step_strict": 0.96875, "step": 130 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.270774924761909e-06, "aux_brier/mean_group_std": 0.05245029687610163, "aux_brier/mean_r": 0.9671859307003272, "aux_brier/n_active_tok": 243.375, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 60.84375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6170370848946892, "calib/avg_num_step_conf": 7.60546875, "calib/ece": 0.3822310756972111, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01720764956712753, "calib/mean_conf": 0.055219123505976096, "calib/mu_c": 0.06495412844036696, "calib/mu_w": 0.047746478873239434, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0015936254980079682, "calib/std_conf": 0.06435902002821915, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 486.2265625, "completions/mean_terminated_length": 486.2265625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.13973333333333332, "grad_norm": 0.13326288759708405, "learning_rate": 1.916666666666667e-06, "loss": -0.0098, "num_tokens": 28555508.0, "reward": 1.0638514757156372, "reward_std": 0.1941002905368805, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.599155843257904, "rewards/format_reward_step_strict": 0.9765625, "step": 131 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.596242803549643e-08, "aux_brier/mean_group_std": 0.09452182542658515, "aux_brier/mean_r": 0.9286314229311331, "aux_brier/n_active_tok": 267.875, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 66.96875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5734089688506981, "calib/avg_num_step_conf": 8.375, "calib/ece": 0.5588799999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002553705692803436, "calib/mean_conf": 0.04992, "calib/mu_c": 0.05092105263157895, "calib/mu_w": 0.048367346938775514, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0004, "calib/std_conf": 0.013798318738165168, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2848.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 511.48046875, "completions/mean_terminated_length": 511.48046875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.1408, "grad_norm": 0.3829323649406433, "learning_rate": 1.888888888888889e-06, "loss": 0.0553, "num_tokens": 28792039.0, "reward": 1.1843125820159912, "reward_std": 0.2582811713218689, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.4325625002384186, "rewards/format_reward_step_strict": 0.96484375, "step": 132 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.8079169965833373e-06, "aux_brier/mean_group_std": 0.05679022952356221, "aux_brier/mean_r": 0.9633026394767802, "aux_brier/n_active_tok": 277.625, "aux_brier/n_groups": 15.1875, "aux_brier/n_step_records": 69.40625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5233465608465608, "calib/avg_num_step_conf": 8.875, "calib/ece": 0.37421686746987953, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008829365079365062, "calib/mean_conf": 0.04955823293172691, "calib/mu_c": 0.04904761904761905, "calib/mu_w": 0.049930555555555554, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.001044176706827309, "calib/std_conf": 0.019599545690014854, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2434.0, "completions/max_terminated_length": 2434.0, "completions/mean_length": 613.3515625, "completions/mean_terminated_length": 620.62451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.14186666666666667, "grad_norm": 0.20712287724018097, "learning_rate": 1.8611111111111113e-06, "loss": 0.0001, "num_tokens": 29055401.0, "reward": 1.039604902267456, "reward_std": 0.2921742796897888, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5959199070930481, "rewards/format_reward_step_strict": 0.9609375, "step": 133 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.1055995453055374e-07, "aux_brier/mean_group_std": 0.05259560300703106, "aux_brier/mean_r": 0.9646003977140253, "aux_brier/n_active_tok": 301.125, "aux_brier/n_groups": 20.84375, "aux_brier/n_step_records": 75.28125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5557312252964427, "calib/avg_num_step_conf": 9.48046875, "calib/ece": 0.48465587044534414, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034505928853754836, "calib/mean_conf": 0.04975708502024292, "calib/mu_c": 0.05136363636363636, "calib/mu_w": 0.04791304347826088, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.015082153582465577, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 612.5078125, "completions/mean_terminated_length": 614.9098510742188, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.14293333333333333, "grad_norm": 0.30655884742736816, "learning_rate": 1.8333333333333333e-06, "loss": 0.0458, "num_tokens": 29321155.0, "reward": 1.1200120449066162, "reward_std": 0.29521992802619934, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.49567344784736633, "rewards/format_reward_step_strict": 0.9609375, "step": 134 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.457031528685821e-07, "aux_brier/mean_group_std": 0.03996141459596204, "aux_brier/mean_r": 0.9808545793400902, "aux_brier/n_active_tok": 279.625, "aux_brier/n_groups": 18.34375, "aux_brier/n_step_records": 69.90625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5963361777055398, "calib/avg_num_step_conf": 8.73828125, "calib/ece": 0.511265306122449, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005707706894216444, "calib/mean_conf": 0.05200000000000001, "calib/mu_c": 0.0544927536231884, "calib/mu_w": 0.04878504672897196, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014613384974336483, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 586.828125, "completions/mean_terminated_length": 589.1294555664062, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.144, "grad_norm": 0.18846307694911957, "learning_rate": 1.8055555555555557e-06, "loss": 0.0848, "num_tokens": 29577263.0, "reward": 1.130202293395996, "reward_std": 0.3218243718147278, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4661218523979187, "rewards/format_reward_step_strict": 0.94921875, "step": 135 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.1063661412014092e-07, "aux_brier/mean_group_std": 0.05412816794943832, "aux_brier/mean_r": 0.9704970737134514, "aux_brier/n_active_tok": 280.25, "aux_brier/n_groups": 17.65625, "aux_brier/n_step_records": 70.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5938271604938271, "calib/avg_num_step_conf": 8.76171875, "calib/ece": 0.40879518072289156, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034697855750487477, "calib/mean_conf": 0.05232931726907631, "calib/mu_c": 0.05421052631578949, "calib/mu_w": 0.05074074074074074, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0016465863453815261, "calib/std_conf": 0.013775158944692768, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 544.7734375, "completions/mean_terminated_length": 544.7734375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.14506666666666668, "grad_norm": 0.1553274393081665, "learning_rate": 1.777777777777778e-06, "loss": 0.0518, "num_tokens": 29825213.0, "reward": 1.0728051662445068, "reward_std": 0.20379653573036194, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5724707245826721, "rewards/format_reward_step_strict": 0.96875, "step": 136 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.474809657928347e-06, "aux_brier/mean_group_std": 0.06528294052165005, "aux_brier/mean_r": 0.9520557914546299, "aux_brier/n_active_tok": 299.375, "aux_brier/n_groups": 20.78125, "aux_brier/n_step_records": 74.84375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4745806451612903, "calib/avg_num_step_conf": 9.421875, "calib/ece": 0.44831325301204816, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.008133548387096783, "calib/mean_conf": 0.05738955823293173, "calib/mu_c": 0.05330645161290322, "calib/mu_w": 0.06144, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0038554216867469873, "calib/std_conf": 0.05891261014368137, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 559.30078125, "completions/mean_terminated_length": 561.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.14613333333333334, "grad_norm": 0.039768896996974945, "learning_rate": 1.75e-06, "loss": 0.0633, "num_tokens": 30075378.0, "reward": 1.0999794006347656, "reward_std": 0.27602440118789673, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5327301025390625, "rewards/format_reward_step_strict": 0.96484375, "step": 137 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.3754860414569876e-06, "aux_brier/mean_group_std": 0.04512002295921746, "aux_brier/mean_r": 0.9701418410569603, "aux_brier/n_active_tok": 267.875, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 66.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5121019108280255, "calib/avg_num_step_conf": 8.3984375, "calib/ece": 0.5724603174603174, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015742541066040908, "calib/mean_conf": 0.05365079365079366, "calib/mu_c": 0.05305732484076432, "calib/mu_w": 0.05463157894736841, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0015476190476190477, "calib/std_conf": 0.02545157788488635, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 529.83984375, "completions/mean_terminated_length": 531.9176635742188, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.1472, "grad_norm": 0.17647089064121246, "learning_rate": 1.7222222222222224e-06, "loss": 0.0473, "num_tokens": 30315353.0, "reward": 1.203694224357605, "reward_std": 0.3053821325302124, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.4241519570350647, "rewards/format_reward_step_strict": 0.96875, "step": 138 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.288317745319235e-06, "aux_brier/mean_group_std": 0.06964590410507535, "aux_brier/mean_r": 0.9577885741251095, "aux_brier/n_active_tok": 241.125, "aux_brier/n_groups": 12.59375, "aux_brier/n_step_records": 60.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5904228222109017, "calib/avg_num_step_conf": 7.64453125, "calib/ece": 0.5367058823529411, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035220326031584273, "calib/mean_conf": 0.055450980392156866, "calib/mu_c": 0.05688741721854305, "calib/mu_w": 0.05336538461538462, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01390769814405972, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2001.0, "completions/max_terminated_length": 2001.0, "completions/mean_length": 467.2734375, "completions/mean_terminated_length": 469.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.14826666666666666, "grad_norm": 0.0982087180018425, "learning_rate": 1.6944444444444446e-06, "loss": 0.0032, "num_tokens": 30538071.0, "reward": 1.2004034519195557, "reward_std": 0.2416152060031891, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.4656761884689331, "rewards/format_reward_step_strict": 0.98828125, "step": 139 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.4861101533278926e-06, "aux_brier/mean_group_std": 0.03429095544848432, "aux_brier/mean_r": 0.9768792414671389, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 15.5625, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6691056910569105, "calib/avg_num_step_conf": 8.1171875, "calib/ece": 0.594015748031496, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.0006815718157181655, "calib/mean_conf": 0.0594488188976378, "calib/mu_c": 0.05920731707317073, "calib/mu_w": 0.059888888888888894, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0038976377952755904, "calib/std_conf": 0.06054616364127345, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1981.0, "completions/max_terminated_length": 1981.0, "completions/mean_length": 517.515625, "completions/mean_terminated_length": 517.515625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.14933333333333335, "grad_norm": 0.028828710317611694, "learning_rate": 1.6666666666666667e-06, "loss": 0.0107, "num_tokens": 30775571.0, "reward": 1.236849308013916, "reward_std": 0.20067928731441498, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.41614726185798645, "rewards/format_reward_step_strict": 0.984375, "step": 140 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.505575863626547e-07, "aux_brier/mean_group_std": 0.055646158079814545, "aux_brier/mean_r": 0.9621782349581267, "aux_brier/n_active_tok": 249.875, "aux_brier/n_groups": 14.53125, "aux_brier/n_step_records": 62.46875, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6119267337807606, "calib/avg_num_step_conf": 7.90234375, "calib/ece": 0.5520816326530612, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005032857941834432, "calib/mean_conf": 0.05608163265306122, "calib/mu_c": 0.058053691275167775, "calib/mu_w": 0.05302083333333334, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.01552330273964561, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2753.0, "completions/max_terminated_length": 2753.0, "completions/mean_length": 557.76953125, "completions/mean_terminated_length": 559.9569091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.1504, "grad_norm": 0.35210710763931274, "learning_rate": 1.638888888888889e-06, "loss": 0.024, "num_tokens": 31025456.0, "reward": 1.158670425415039, "reward_std": 0.312752902507782, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4237445294857025, "rewards/format_reward_step_strict": 0.94140625, "step": 141 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.714073346929638e-06, "aux_brier/mean_group_std": 0.058085549976393896, "aux_brier/mean_r": 0.9624802664772752, "aux_brier/n_active_tok": 273.625, "aux_brier/n_groups": 15.4375, "aux_brier/n_step_records": 68.40625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6088179519595448, "calib/avg_num_step_conf": 8.55078125, "calib/ece": 0.49596837944664024, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004401390644753464, "calib/mean_conf": 0.05739130434782609, "calib/mu_c": 0.05935714285714285, "calib/mu_w": 0.05495575221238939, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0156389931326737, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1721.0, "completions/max_terminated_length": 1721.0, "completions/mean_length": 552.61328125, "completions/mean_terminated_length": 556.9645385742188, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.15146666666666667, "grad_norm": 0.5921297669410706, "learning_rate": 1.6111111111111113e-06, "loss": -0.0131, "num_tokens": 31272085.0, "reward": 1.1667234897613525, "reward_std": 0.18465447425842285, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5028312802314758, "rewards/format_reward_step_strict": 0.98828125, "step": 142 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.382482328870509e-06, "aux_brier/mean_group_std": 0.06155460154384764, "aux_brier/mean_r": 0.9625660744821987, "aux_brier/n_active_tok": 258.875, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 64.71875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5420883016435707, "calib/avg_num_step_conf": 8.08984375, "calib/ece": 0.5163888888888888, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013619078311311733, "calib/mean_conf": 0.05900793650793651, "calib/mu_c": 0.05958620689655173, "calib/mu_w": 0.05822429906542056, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.015942852322794843, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 524.44921875, "completions/mean_terminated_length": 524.44921875, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.15253333333333333, "grad_norm": 0.48159152269363403, "learning_rate": 1.5833333333333333e-06, "loss": 0.0431, "num_tokens": 31513680.0, "reward": 1.1731898784637451, "reward_std": 0.1981586366891861, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.47400975227355957, "rewards/format_reward_step_strict": 0.9765625, "step": 143 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.5024501462956863e-06, "aux_brier/mean_group_std": 0.02612270270458595, "aux_brier/mean_r": 0.9838132009374825, "aux_brier/n_active_tok": 268.125, "aux_brier/n_groups": 15.1875, "aux_brier/n_step_records": 67.03125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5332445036642238, "calib/avg_num_step_conf": 8.37890625, "calib/ece": 0.5641106719367588, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008014656895403011, "calib/mean_conf": 0.06039525691699605, "calib/mu_c": 0.06069620253164556, "calib/mu_w": 0.05989473684210526, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.016389584361016573, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2603.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 541.16015625, "completions/mean_terminated_length": 541.16015625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.1536, "grad_norm": 0.38825613260269165, "learning_rate": 1.5555555555555558e-06, "loss": -0.004, "num_tokens": 31756345.0, "reward": 1.2218644618988037, "reward_std": 0.2094503939151764, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.44214531779289246, "rewards/format_reward_step_strict": 0.98828125, "step": 144 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.3559944934969934e-06, "aux_brier/mean_group_std": 0.06517902483204491, "aux_brier/mean_r": 0.9474553158431747, "aux_brier/n_active_tok": 280.125, "aux_brier/n_groups": 16.25, "aux_brier/n_step_records": 70.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4458034038697987, "calib/avg_num_step_conf": 8.75390625, "calib/ece": 0.5351574803149606, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005481783472464891, "calib/mean_conf": 0.0691732283464567, "calib/mu_c": 0.06699346405228758, "calib/mu_w": 0.07247524752475247, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000984251968503937, "calib/std_conf": 0.027011690233356714, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2537.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 515.55078125, "completions/mean_terminated_length": 515.55078125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.15466666666666667, "grad_norm": 0.9937516450881958, "learning_rate": 1.527777777777778e-06, "loss": 0.0377, "num_tokens": 31991030.0, "reward": 1.2110344171524048, "reward_std": 0.2681943476200104, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.46913787722587585, "rewards/format_reward_step_strict": 0.9921875, "step": 145 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.8197305471367997e-06, "aux_brier/mean_group_std": 0.03062187333883319, "aux_brier/mean_r": 0.983849446933646, "aux_brier/n_active_tok": 271.375, "aux_brier/n_groups": 16.96875, "aux_brier/n_step_records": 67.84375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5542513316235944, "calib/avg_num_step_conf": 8.6171875, "calib/ece": 0.3800806451612903, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007624120470835785, "calib/mean_conf": 0.06749999999999999, "calib/mu_c": 0.0717117117117117, "calib/mu_w": 0.06408759124087592, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0394544449435859, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2625.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 536.5390625, "completions/mean_terminated_length": 538.6431884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.15573333333333333, "grad_norm": 0.22396814823150635, "learning_rate": 1.5e-06, "loss": 0.0678, "num_tokens": 32235600.0, "reward": 1.0630507469177246, "reward_std": 0.23441281914710999, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.588140606880188, "rewards/format_reward_step_strict": 0.96484375, "step": 146 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.0655819099602013e-07, "aux_brier/mean_group_std": 0.04439007321310594, "aux_brier/mean_r": 0.967196735743911, "aux_brier/n_active_tok": 261.5, "aux_brier/n_groups": 14.28125, "aux_brier/n_step_records": 65.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5725832215193917, "calib/avg_num_step_conf": 8.171875, "calib/ece": 0.37253968253968256, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004976039869656904, "calib/mean_conf": 0.06793650793650792, "calib/mu_c": 0.07072072072072072, "calib/mu_w": 0.06574468085106382, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.028488857758374963, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2768.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 534.3359375, "completions/mean_terminated_length": 534.3359375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.1568, "grad_norm": 0.7205764651298523, "learning_rate": 1.4722222222222225e-06, "loss": 0.0173, "num_tokens": 32476070.0, "reward": 1.0745468139648438, "reward_std": 0.20038121938705444, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6028749942779541, "rewards/format_reward_step_strict": 0.98046875, "step": 147 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.0828740335949716e-06, "aux_brier/mean_group_std": 0.06532441346014504, "aux_brier/mean_r": 0.9624720179970169, "aux_brier/n_active_tok": 264.25, "aux_brier/n_groups": 14.34375, "aux_brier/n_step_records": 66.0625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.566489723696747, "calib/avg_num_step_conf": 8.265625, "calib/ece": 0.5632270916334662, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003103988022322021, "calib/mean_conf": 0.06625498007968127, "calib/mu_c": 0.06740506329113923, "calib/mu_w": 0.0643010752688172, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0176146988234553, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 521.12109375, "completions/mean_terminated_length": 525.2244262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.15786666666666666, "grad_norm": 0.35711437463760376, "learning_rate": 1.4444444444444445e-06, "loss": 0.0267, "num_tokens": 32714589.0, "reward": 1.209111213684082, "reward_std": 0.2663658559322357, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.43019530177116394, "rewards/format_reward_step_strict": 0.96875, "step": 148 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.378460294938378e-06, "aux_brier/mean_group_std": 0.04840870256089264, "aux_brier/mean_r": 0.9751864015836805, "aux_brier/n_active_tok": 283.875, "aux_brier/n_groups": 16.8125, "aux_brier/n_step_records": 70.96875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5727946916471508, "calib/avg_num_step_conf": 9.10546875, "calib/ece": 0.42709677419354836, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.000554254488680711, "calib/mean_conf": 0.0728225806451613, "calib/mu_c": 0.07254098360655739, "calib/mu_w": 0.0730952380952381, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.003991935483870968, "calib/std_conf": 0.06446505034451548, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2578.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 580.8515625, "completions/mean_terminated_length": 587.7391357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.15893333333333334, "grad_norm": 0.6782702803611755, "learning_rate": 1.4166666666666667e-06, "loss": 0.0128, "num_tokens": 32967743.0, "reward": 1.0911545753479004, "reward_std": 0.23866769671440125, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5443058609962463, "rewards/format_reward_step_strict": 0.95703125, "step": 149 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.712223138637507e-06, "aux_brier/mean_group_std": 0.0703186177078078, "aux_brier/mean_r": 0.9530558812833309, "aux_brier/n_active_tok": 263.5, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 65.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6715841584158416, "calib/avg_num_step_conf": 8.234375, "calib/ece": 0.5269322709163347, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009469306930693075, "calib/mean_conf": 0.07258964143426296, "calib/mu_c": 0.07640000000000001, "calib/mu_w": 0.06693069306930693, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0009561752988047808, "calib/std_conf": 0.02238887859478425, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 496.140625, "completions/mean_terminated_length": 496.140625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.16, "grad_norm": 0.04868630692362785, "learning_rate": 1.3888888888888892e-06, "loss": 0.0303, "num_tokens": 33199715.0, "reward": 1.1928496360778809, "reward_std": 0.20058949291706085, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.4745234251022339, "rewards/format_reward_step_strict": 0.9765625, "step": 150 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.773511487314423e-07, "aux_brier/mean_group_std": 0.06375056078987522, "aux_brier/mean_r": 0.9548843774785912, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5365328563857975, "calib/avg_num_step_conf": 8.328125, "calib/ece": 0.3739271255060729, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021813725490195945, "calib/mean_conf": 0.07546558704453442, "calib/mu_c": 0.07666666666666666, "calib/mu_w": 0.07448529411764707, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.028506797079224936, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 526.30859375, "completions/mean_terminated_length": 530.4527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.16106666666666666, "grad_norm": 1.04205322265625, "learning_rate": 1.3611111111111112e-06, "loss": 0.0793, "num_tokens": 33441474.0, "reward": 1.0522384643554688, "reward_std": 0.2746904194355011, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5761418342590332, "rewards/format_reward_step_strict": 0.94921875, "step": 151 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.2677447153164323e-06, "aux_brier/mean_group_std": 0.028382429319569653, "aux_brier/mean_r": 0.9833729546309443, "aux_brier/n_active_tok": 272.75, "aux_brier/n_groups": 16.03125, "aux_brier/n_step_records": 68.1875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5209770475354321, "calib/avg_num_step_conf": 8.5703125, "calib/ece": 0.3976679841897233, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003148124921610457, "calib/mean_conf": 0.07268774703557312, "calib/mu_c": 0.07252100840336136, "calib/mu_w": 0.0728358208955224, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02199284217153069, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2459.0, "completions/max_terminated_length": 2459.0, "completions/mean_length": 508.5859375, "completions/mean_terminated_length": 512.590576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.16213333333333332, "grad_norm": 1.1266095638275146, "learning_rate": 1.3333333333333334e-06, "loss": 0.0088, "num_tokens": 33677064.0, "reward": 1.1023454666137695, "reward_std": 0.2495948076248169, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5812570452690125, "rewards/format_reward_step_strict": 0.984375, "step": 152 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.792688601622629e-06, "aux_brier/mean_group_std": 0.07092418107172463, "aux_brier/mean_r": 0.9471419582640936, "aux_brier/n_active_tok": 263.125, "aux_brier/n_groups": 15.15625, "aux_brier/n_step_records": 65.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5624302134646962, "calib/avg_num_step_conf": 8.39453125, "calib/ece": 0.50472, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004013136288998367, "calib/mean_conf": 0.07528, "calib/mu_c": 0.07696551724137932, "calib/mu_w": 0.07295238095238095, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.023700666657290467, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 529.859375, "completions/mean_terminated_length": 531.937255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.1632, "grad_norm": 0.46357324719429016, "learning_rate": 1.3055555555555556e-06, "loss": 0.0795, "num_tokens": 33920028.0, "reward": 1.1745765209197998, "reward_std": 0.18833814561367035, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.48736873269081116, "rewards/format_reward_step_strict": 0.97265625, "step": 153 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.3004426348055595e-06, "aux_brier/mean_group_std": 0.06479431974825353, "aux_brier/mean_r": 0.9635108167995876, "aux_brier/n_active_tok": 267.625, "aux_brier/n_groups": 16.09375, "aux_brier/n_step_records": 66.90625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5803173187205303, "calib/avg_num_step_conf": 8.36328125, "calib/ece": 0.39593625498007967, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008738371352109087, "calib/mean_conf": 0.07418326693227092, "calib/mu_c": 0.0788135593220339, "calib/mu_w": 0.07007518796992482, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.027650894938876375, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2674.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 508.1953125, "completions/mean_terminated_length": 508.1953125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.16426666666666667, "grad_norm": 0.3397197127342224, "learning_rate": 1.2777777777777779e-06, "loss": 0.1079, "num_tokens": 34154566.0, "reward": 1.0976823568344116, "reward_std": 0.2694038152694702, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5860421657562256, "rewards/format_reward_step_strict": 0.98046875, "step": 154 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.0855995707401718e-06, "aux_brier/mean_group_std": 0.06309926270947872, "aux_brier/mean_r": 0.9577261976476592, "aux_brier/n_active_tok": 265.25, "aux_brier/n_groups": 15.3125, "aux_brier/n_step_records": 66.3125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5155589704187475, "calib/avg_num_step_conf": 8.56640625, "calib/ece": 0.37792828685258967, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0036470738891023202, "calib/mean_conf": 0.07625498007968128, "calib/mu_c": 0.07824561403508772, "calib/mu_w": 0.0745985401459854, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.036208761748919856, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 499.01171875, "completions/mean_terminated_length": 502.9409484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.16533333333333333, "grad_norm": 0.38062500953674316, "learning_rate": 1.25e-06, "loss": 0.0251, "num_tokens": 34389529.0, "reward": 1.0800366401672363, "reward_std": 0.2779013216495514, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5935840010643005, "rewards/format_reward_step_strict": 0.97265625, "step": 155 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.697006269203328e-06, "aux_brier/mean_group_std": 0.049255267466241745, "aux_brier/mean_r": 0.9655943838545354, "aux_brier/n_active_tok": 295.0, "aux_brier/n_groups": 19.09375, "aux_brier/n_step_records": 73.75, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5478144746270602, "calib/avg_num_step_conf": 9.70703125, "calib/ece": 0.3997983870967742, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0063846003517686045, "calib/mean_conf": 0.08004032258064517, "calib/mu_c": 0.08336134453781513, "calib/mu_w": 0.07697674418604653, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03213502239632129, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2590.0, "completions/max_terminated_length": 2590.0, "completions/mean_length": 532.84765625, "completions/mean_terminated_length": 539.166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.1664, "grad_norm": 0.3053121864795685, "learning_rate": 1.2222222222222223e-06, "loss": 0.0197, "num_tokens": 34630698.0, "reward": 1.086916446685791, "reward_std": 0.24515119194984436, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5664156675338745, "rewards/format_reward_step_strict": 0.9609375, "step": 156 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.0148835825906097e-06, "aux_brier/mean_group_std": 0.07082002987039873, "aux_brier/mean_r": 0.9552137337020143, "aux_brier/n_active_tok": 282.375, "aux_brier/n_groups": 18.0, "aux_brier/n_step_records": 70.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6178282633808241, "calib/avg_num_step_conf": 8.82421875, "calib/ece": 0.4973517786561264, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009898600949813902, "calib/mean_conf": 0.08367588932806325, "calib/mu_c": 0.08782312925170069, "calib/mu_w": 0.07792452830188679, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.032829745437239044, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2592.0, "completions/max_terminated_length": 2592.0, "completions/mean_length": 491.51171875, "completions/mean_terminated_length": 493.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.16746666666666668, "grad_norm": 0.22145549952983856, "learning_rate": 1.1944444444444446e-06, "loss": 0.0098, "num_tokens": 34860253.0, "reward": 1.1950936317443848, "reward_std": 0.20999005436897278, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5069370865821838, "rewards/format_reward_step_strict": 0.98828125, "step": 157 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.561560416240339e-06, "aux_brier/mean_group_std": 0.05701335252611965, "aux_brier/mean_r": 0.9607430173743206, "aux_brier/n_active_tok": 269.5, "aux_brier/n_groups": 16.09375, "aux_brier/n_step_records": 67.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5089937267955447, "calib/avg_num_step_conf": 8.4296875, "calib/ece": 0.4960474308300396, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00031366022276275995, "calib/mean_conf": 0.08355731225296444, "calib/mu_c": 0.08342465753424658, "calib/mu_w": 0.08373831775700934, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012648221343873518, "calib/std_conf": 0.03333850928688426, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2655.0, "completions/max_terminated_length": 2655.0, "completions/mean_length": 489.6640625, "completions/mean_terminated_length": 491.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.16853333333333334, "grad_norm": 0.2728252112865448, "learning_rate": 1.1666666666666668e-06, "loss": 0.0528, "num_tokens": 35090847.0, "reward": 1.1907347440719604, "reward_std": 0.25884896516799927, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5051265954971313, "rewards/format_reward_step_strict": 0.98828125, "step": 158 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.549838194228828e-06, "aux_brier/mean_group_std": 0.045389470776766, "aux_brier/mean_r": 0.9715923945016651, "aux_brier/n_active_tok": 252.75, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 63.1875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5049873737373738, "calib/avg_num_step_conf": 7.9609375, "calib/ece": 0.44936507936507936, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019621212121212067, "calib/mean_conf": 0.07444444444444444, "calib/mu_c": 0.07537878787878788, "calib/mu_w": 0.07341666666666667, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.026172673121829793, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 465.7109375, "completions/mean_terminated_length": 467.53729248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.1696, "grad_norm": 0.16256766021251678, "learning_rate": 1.138888888888889e-06, "loss": 0.0475, "num_tokens": 35314853.0, "reward": 1.142901062965393, "reward_std": 0.2668300271034241, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5403547286987305, "rewards/format_reward_step_strict": 0.984375, "step": 159 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.4189522139496091e-06, "aux_brier/mean_group_std": 0.048682114241358466, "aux_brier/mean_r": 0.9764424962548897, "aux_brier/n_active_tok": 283.0, "aux_brier/n_groups": 19.0625, "aux_brier/n_step_records": 70.75, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5760825982357658, "calib/avg_num_step_conf": 8.87109375, "calib/ece": 0.43897959183673474, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010247928361400677, "calib/mean_conf": 0.08755102040816327, "calib/mu_c": 0.0924031007751938, "calib/mu_w": 0.08215517241379312, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.034601557841825795, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2636.0, "completions/max_terminated_length": 2636.0, "completions/mean_length": 516.75, "completions/mean_terminated_length": 518.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.17066666666666666, "grad_norm": 0.3571442663669586, "learning_rate": 1.111111111111111e-06, "loss": 0.0678, "num_tokens": 35551981.0, "reward": 1.1207700967788696, "reward_std": 0.27606478333473206, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5377683639526367, "rewards/format_reward_step_strict": 0.95703125, "step": 160 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.788575513085201e-06, "aux_brier/mean_group_std": 0.07286658959147208, "aux_brier/mean_r": 0.9612494504089659, "aux_brier/n_active_tok": 236.625, "aux_brier/n_groups": 13.28125, "aux_brier/n_step_records": 59.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5555515370705244, "calib/avg_num_step_conf": 7.48828125, "calib/ece": 0.6029921259842521, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0062972875226039865, "calib/mean_conf": 0.08598425196850396, "calib/mu_c": 0.08794285714285714, "calib/mu_w": 0.08164556962025316, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.035221679135631255, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2552.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 453.97265625, "completions/mean_terminated_length": 455.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.17173333333333332, "grad_norm": 0.2653784155845642, "learning_rate": 1.0833333333333335e-06, "loss": 0.0191, "num_tokens": 35772118.0, "reward": 1.2818372249603271, "reward_std": 0.21012239158153534, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.41641169786453247, "rewards/format_reward_step_strict": 0.98828125, "step": 161 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.81067918833844e-07, "aux_brier/mean_group_std": 0.05413243927366356, "aux_brier/mean_r": 0.9664557423505251, "aux_brier/n_active_tok": 245.375, "aux_brier/n_groups": 15.4375, "aux_brier/n_step_records": 61.34375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6345530650823656, "calib/avg_num_step_conf": 7.66796875, "calib/ece": 0.5496442687747036, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01363354037267081, "calib/mean_conf": 0.08671936758893282, "calib/mu_c": 0.09167701863354039, "calib/mu_w": 0.07804347826086958, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.041644888516131295, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2862.0, "completions/max_terminated_length": 2862.0, "completions/mean_length": 465.25390625, "completions/mean_terminated_length": 465.25390625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.1728, "grad_norm": 0.7708801031112671, "learning_rate": 1.0555555555555557e-06, "loss": 0.0651, "num_tokens": 35995367.0, "reward": 1.2394322156906128, "reward_std": 0.21498660743236542, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.46554142236709595, "rewards/format_reward_step_strict": 0.98828125, "step": 162 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.302051902955854e-06, "aux_brier/mean_group_std": 0.0638352361480643, "aux_brier/mean_r": 0.9578971509819684, "aux_brier/n_active_tok": 275.875, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 68.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6277521761392729, "calib/avg_num_step_conf": 8.921875, "calib/ece": 0.41291999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01506144393241167, "calib/mean_conf": 0.09372000000000001, "calib/mu_c": 0.10119047619047618, "calib/mu_w": 0.08612903225806451, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00132, "calib/std_conf": 0.04174400076657722, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 516.421875, "completions/mean_terminated_length": 520.4881591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.17386666666666667, "grad_norm": 0.6164954900741577, "learning_rate": 1.0277777777777777e-06, "loss": 0.0461, "num_tokens": 36232403.0, "reward": 1.1238950490951538, "reward_std": 0.21191027760505676, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5737050771713257, "rewards/format_reward_step_strict": 0.9765625, "step": 163 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.3525187265717165e-06, "aux_brier/mean_group_std": 0.03834141689759305, "aux_brier/mean_r": 0.9800117110780533, "aux_brier/n_active_tok": 265.75, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 66.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.527529761904762, "calib/avg_num_step_conf": 8.421875, "calib/ece": 0.419484251968504, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005918898809523926, "calib/mean_conf": 0.08791732283464566, "calib/mu_c": 0.08821093750000002, "calib/mu_w": 0.08761904761904762, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001732283464566929, "calib/std_conf": 0.03821114483607079, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1960.0, "completions/max_terminated_length": 1960.0, "completions/mean_length": 515.90234375, "completions/mean_terminated_length": 517.925537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.17493333333333333, "grad_norm": 0.14638356864452362, "learning_rate": 1.0000000000000002e-06, "loss": -0.0287, "num_tokens": 36470610.0, "reward": 1.131101369857788, "reward_std": 0.24305739998817444, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5712807178497314, "rewards/format_reward_step_strict": 0.9765625, "step": 164 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.987938269669257e-07, "aux_brier/mean_group_std": 0.04195193778587433, "aux_brier/mean_r": 0.9751945249083633, "aux_brier/n_active_tok": 265.25, "aux_brier/n_groups": 15.0, "aux_brier/n_step_records": 66.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5782344844844846, "calib/avg_num_step_conf": 8.2890625, "calib/ece": 0.3345490196078431, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010964714714714718, "calib/mean_conf": 0.10074509803921569, "calib/mu_c": 0.10693693693693694, "calib/mu_w": 0.09597222222222222, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.040678482599022986, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2131.0, "completions/max_terminated_length": 2131.0, "completions/mean_length": 504.55078125, "completions/mean_terminated_length": 504.55078125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.176, "grad_norm": 0.04463425651192665, "learning_rate": 9.722222222222224e-07, "loss": 0.0131, "num_tokens": 36705351.0, "reward": 1.0925097465515137, "reward_std": 0.21812310814857483, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6434761881828308, "rewards/format_reward_step_strict": 0.99609375, "step": 165 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.738217014768395e-06, "aux_brier/mean_group_std": 0.06286699413879694, "aux_brier/mean_r": 0.9591385617465502, "aux_brier/n_active_tok": 280.5, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 70.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6489734927234927, "calib/avg_num_step_conf": 8.765625, "calib/ece": 0.4890873015873015, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02069906444906444, "calib/mean_conf": 0.09821428571428571, "calib/mu_c": 0.10675675675675676, "calib/mu_w": 0.08605769230769232, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.042515003354158185, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 564.296875, "completions/mean_terminated_length": 564.296875, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.17706666666666668, "grad_norm": 0.013891350477933884, "learning_rate": 9.444444444444445e-07, "loss": 0.0391, "num_tokens": 36955995.0, "reward": 1.1921110153198242, "reward_std": 0.23113755881786346, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5106316208839417, "rewards/format_reward_step_strict": 0.97265625, "step": 166 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.111495625265139e-06, "aux_brier/mean_group_std": 0.05852985912014213, "aux_brier/mean_r": 0.9574375097612414, "aux_brier/n_active_tok": 266.125, "aux_brier/n_groups": 16.3125, "aux_brier/n_step_records": 66.53125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6824582991523106, "calib/avg_num_step_conf": 8.31640625, "calib/ece": 0.5209561752988048, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.029357396773311464, "calib/mean_conf": 0.11250996015936256, "calib/mu_c": 0.12327044025157234, "calib/mu_w": 0.09391304347826088, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.04700253129996288, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 498.57421875, "completions/mean_terminated_length": 498.57421875, "completions/min_length": 1.0, "completions/min_terminated_length": 1.0, "epoch": 0.17813333333333334, "grad_norm": 0.16410934925079346, "learning_rate": 9.166666666666666e-07, "loss": 0.0268, "num_tokens": 37189238.0, "reward": 1.2358087301254272, "reward_std": 0.25065454840660095, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.4979226589202881, "rewards/format_reward_step_strict": 0.98046875, "step": 167 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.6832584311954335e-06, "aux_brier/mean_group_std": 0.08202811903094603, "aux_brier/mean_r": 0.9427979547787165, "aux_brier/n_active_tok": 278.625, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 69.65625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4515483265561464, "calib/avg_num_step_conf": 8.7109375, "calib/ece": 0.42940944881889764, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011261182358461075, "calib/mean_conf": 0.12114173228346456, "calib/mu_c": 0.11604316546762589, "calib/mu_w": 0.12730434782608696, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001653543307086614, "calib/std_conf": 0.045444822372452516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2457.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 537.90625, "completions/mean_terminated_length": 537.90625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.1792, "grad_norm": 0.3969951570034027, "learning_rate": 8.88888888888889e-07, "loss": 0.021, "num_tokens": 37431614.0, "reward": 1.1758363246917725, "reward_std": 0.2561456263065338, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5549074411392212, "rewards/format_reward_step_strict": 0.98828125, "step": 168 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.90437492831164e-06, "aux_brier/mean_group_std": 0.048136441288030074, "aux_brier/mean_r": 0.9676011564396277, "aux_brier/n_active_tok": 256.125, "aux_brier/n_groups": 14.34375, "aux_brier/n_step_records": 64.03125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5763746145940392, "calib/avg_num_step_conf": 8.27734375, "calib/ece": 0.44027888446215147, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011812692702980451, "calib/mean_conf": 0.11350597609561754, "calib/mu_c": 0.11877697841726617, "calib/mu_w": 0.10696428571428572, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04619871943499832, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2677.0, "completions/max_terminated_length": 2677.0, "completions/mean_length": 500.6953125, "completions/mean_terminated_length": 506.6324462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.18026666666666666, "grad_norm": 0.18512670695781708, "learning_rate": 8.611111111111112e-07, "loss": -0.0053, "num_tokens": 37663976.0, "reward": 1.1750493049621582, "reward_std": 0.21191146969795227, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5517597794532776, "rewards/format_reward_step_strict": 0.98046875, "step": 169 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.415873636585353e-06, "aux_brier/mean_group_std": 0.05310493198191623, "aux_brier/mean_r": 0.9692374323947502, "aux_brier/n_active_tok": 278.625, "aux_brier/n_groups": 14.25, "aux_brier/n_step_records": 69.65625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5742113862008296, "calib/avg_num_step_conf": 8.75, "calib/ece": 0.45572549019607844, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012558753298982026, "calib/mean_conf": 0.1168235294117647, "calib/mu_c": 0.12219178082191778, "calib/mu_w": 0.10963302752293576, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04897886934633356, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 494.00390625, "completions/mean_terminated_length": 495.9411926269531, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.18133333333333335, "grad_norm": 0.13570146262645721, "learning_rate": 8.333333333333333e-07, "loss": 0.0058, "num_tokens": 37894593.0, "reward": 1.2027275562286377, "reward_std": 0.20788493752479553, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5452852249145508, "rewards/format_reward_step_strict": 0.9921875, "step": 170 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.163024858464982e-06, "aux_brier/mean_group_std": 0.06589841047410823, "aux_brier/mean_r": 0.9508623940262273, "aux_brier/n_active_tok": 260.125, "aux_brier/n_groups": 13.25, "aux_brier/n_step_records": 65.03125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5535850495804729, "calib/avg_num_step_conf": 8.2265625, "calib/ece": 0.33789682539682536, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00531273836765829, "calib/mean_conf": 0.11972222222222223, "calib/mu_c": 0.12263157894736842, "calib/mu_w": 0.11731884057971013, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0026190476190476185, "calib/std_conf": 0.04683136564056339, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 480.30078125, "completions/mean_terminated_length": 484.0826721191406, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.1824, "grad_norm": 0.20341052114963531, "learning_rate": 8.055555555555557e-07, "loss": 0.0151, "num_tokens": 38124446.0, "reward": 1.0994094610214233, "reward_std": 0.2636204957962036, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6320128440856934, "rewards/format_reward_step_strict": 0.984375, "step": 171 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.624893337774495e-06, "aux_brier/mean_group_std": 0.05225646806897729, "aux_brier/mean_r": 0.971827501106451, "aux_brier/n_active_tok": 247.25, "aux_brier/n_groups": 13.8125, "aux_brier/n_step_records": 61.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.569299808166621, "calib/avg_num_step_conf": 7.81640625, "calib/ece": 0.5239525691699605, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00609619073718827, "calib/mean_conf": 0.1282213438735178, "calib/mu_c": 0.1303658536585366, "calib/mu_w": 0.12426966292134832, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001976284584980237, "calib/std_conf": 0.047477875219391966, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1934.0, "completions/max_terminated_length": 1934.0, "completions/mean_length": 460.02734375, "completions/mean_terminated_length": 461.8313903808594, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.18346666666666667, "grad_norm": 0.2539040446281433, "learning_rate": 7.777777777777779e-07, "loss": -0.0053, "num_tokens": 38345565.0, "reward": 1.260039210319519, "reward_std": 0.26275694370269775, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.4932820200920105, "rewards/format_reward_step_strict": 0.984375, "step": 172 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.9148429181447e-07, "aux_brier/mean_group_std": 0.07979615823018552, "aux_brier/mean_r": 0.9540008730685733, "aux_brier/n_active_tok": 269.125, "aux_brier/n_groups": 17.71875, "aux_brier/n_step_records": 67.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5855387523629489, "calib/avg_num_step_conf": 8.41015625, "calib/ece": 0.5124110671936759, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012872670807453385, "calib/mean_conf": 0.12395256916996049, "calib/mu_c": 0.1286335403726708, "calib/mu_w": 0.11576086956521742, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04886245237698008, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 510.25390625, "completions/mean_terminated_length": 510.25390625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.18453333333333333, "grad_norm": 0.09980254620313644, "learning_rate": 7.5e-07, "loss": 0.0408, "num_tokens": 38579350.0, "reward": 1.2439329624176025, "reward_std": 0.27457743883132935, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.49916914105415344, "rewards/format_reward_step_strict": 0.98046875, "step": 173 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.077650419567533e-06, "aux_brier/mean_group_std": 0.06339299942105313, "aux_brier/mean_r": 0.9601915076758774, "aux_brier/n_active_tok": 260.625, "aux_brier/n_groups": 14.6875, "aux_brier/n_step_records": 65.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6350469066937119, "calib/avg_num_step_conf": 8.1484375, "calib/ece": 0.3348015873015873, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02428245436105478, "calib/mean_conf": 0.12551587301587303, "calib/mu_c": 0.13862068965517244, "calib/mu_w": 0.11433823529411766, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.04648062134638241, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 526.45703125, "completions/mean_terminated_length": 526.45703125, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.1856, "grad_norm": 0.0846116691827774, "learning_rate": 7.222222222222222e-07, "loss": 0.0003, "num_tokens": 38818355.0, "reward": 1.1021990776062012, "reward_std": 0.2749863564968109, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.635358989238739, "rewards/format_reward_step_strict": 0.98046875, "step": 174 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 8.318068470680728e-07, "aux_brier/mean_group_std": 0.05940279317404167, "aux_brier/mean_r": 0.9610518133940902, "aux_brier/n_active_tok": 256.25, "aux_brier/n_groups": 15.59375, "aux_brier/n_step_records": 64.0625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5991245791245792, "calib/avg_num_step_conf": 8.53125, "calib/ece": 0.28347791164658626, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.012502626262626257, "calib/mean_conf": 0.12206425702811245, "calib/mu_c": 0.12959595959595957, "calib/mu_w": 0.11709333333333331, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003975903614457831, "calib/std_conf": 0.07521788890146662, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 480.47265625, "completions/mean_terminated_length": 490.0438537597656, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.18666666666666668, "grad_norm": 0.49778616428375244, "learning_rate": 6.944444444444446e-07, "loss": -0.0333, "num_tokens": 39047180.0, "reward": 1.0327746868133545, "reward_std": 0.2279902696609497, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6623488664627075, "rewards/format_reward_step_strict": 0.9609375, "step": 175 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.015040904661646e-07, "aux_brier/mean_group_std": 0.07720815796335934, "aux_brier/mean_r": 0.9545459530610562, "aux_brier/n_active_tok": 264.0, "aux_brier/n_groups": 15.6875, "aux_brier/n_step_records": 66.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.680995043842928, "calib/avg_num_step_conf": 8.25, "calib/ece": 0.3602390438247012, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.028872791968483927, "calib/mean_conf": 0.1258167330677291, "calib/mu_c": 0.14065573770491804, "calib/mu_w": 0.11178294573643412, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.045512030269889224, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 475.484375, "completions/mean_terminated_length": 477.34906005859375, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.18773333333333334, "grad_norm": 0.08645545691251755, "learning_rate": 6.666666666666667e-07, "loss": 0.0035, "num_tokens": 39272968.0, "reward": 1.1219011545181274, "reward_std": 0.2135622501373291, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6204172372817993, "rewards/format_reward_step_strict": 0.98046875, "step": 176 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.432011142558757e-07, "aux_brier/mean_group_std": 0.05504437234161081, "aux_brier/mean_r": 0.9677546085845754, "aux_brier/n_active_tok": 276.125, "aux_brier/n_groups": 18.28125, "aux_brier/n_step_records": 69.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4958063286313381, "calib/avg_num_step_conf": 8.7578125, "calib/ece": 0.36645418326693224, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00045622061253018653, "calib/mean_conf": 0.11960159362549802, "calib/mu_c": 0.11983606557377051, "calib/mu_w": 0.11937984496124032, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04477300692219342, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 542.4453125, "completions/mean_terminated_length": 544.5725708007812, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.1888, "grad_norm": 0.0409964844584465, "learning_rate": 6.388888888888889e-07, "loss": 0.0733, "num_tokens": 39515666.0, "reward": 1.112450122833252, "reward_std": 0.2231827825307846, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.598237931728363, "rewards/format_reward_step_strict": 0.97265625, "step": 177 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.233472673836891e-06, "aux_brier/mean_group_std": 0.07240147272896671, "aux_brier/mean_r": 0.957393248241993, "aux_brier/n_active_tok": 249.0, "aux_brier/n_groups": 12.5625, "aux_brier/n_step_records": 62.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5090874668686104, "calib/avg_num_step_conf": 7.921875, "calib/ece": 0.4302766798418972, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0027899785434810087, "calib/mean_conf": 0.12837944664031622, "calib/mu_c": 0.1271223021582734, "calib/mu_w": 0.1299122807017544, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004624505928853755, "calib/std_conf": 0.05458559558659771, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 476.640625, "completions/mean_terminated_length": 478.50982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.18986666666666666, "grad_norm": 0.28055137395858765, "learning_rate": 6.111111111111112e-07, "loss": 0.029, "num_tokens": 39743758.0, "reward": 1.1723394393920898, "reward_std": 0.29497209191322327, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5565453171730042, "rewards/format_reward_step_strict": 0.98046875, "step": 178 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.454093463308496e-06, "aux_brier/mean_group_std": 0.042397692019474235, "aux_brier/mean_r": 0.9736497694609709, "aux_brier/n_active_tok": 266.75, "aux_brier/n_groups": 15.9375, "aux_brier/n_step_records": 66.6875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5346933386149072, "calib/avg_num_step_conf": 8.3359375, "calib/ece": 0.4836111111111111, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006316102198455145, "calib/mean_conf": 0.12353174603174603, "calib/mu_c": 0.12601307189542485, "calib/mu_w": 0.1196969696969697, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.043450242753799106, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 503.9609375, "completions/mean_terminated_length": 505.9372863769531, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.19093333333333334, "grad_norm": 0.162491112947464, "learning_rate": 5.833333333333334e-07, "loss": 0.027, "num_tokens": 39979036.0, "reward": 1.2147315740585327, "reward_std": 0.2703174948692322, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.5151761770248413, "rewards/format_reward_step_strict": 0.9765625, "step": 179 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.7154782352979403e-07, "aux_brier/mean_group_std": 0.07559057480052574, "aux_brier/mean_r": 0.9453568509669251, "aux_brier/n_active_tok": 293.75, "aux_brier/n_groups": 17.09375, "aux_brier/n_step_records": 73.4375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5771254500138466, "calib/avg_num_step_conf": 9.3125, "calib/ece": 0.500281124497992, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012794239822763773, "calib/mean_conf": 0.1302409638554217, "calib/mu_c": 0.13496815286624206, "calib/mu_w": 0.12217391304347829, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.046671781511234696, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 592.5859375, "completions/mean_terminated_length": 599.6126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.192, "grad_norm": 0.15977561473846436, "learning_rate": 5.555555555555555e-07, "loss": 0.0119, "num_tokens": 40234594.0, "reward": 1.2240822315216064, "reward_std": 0.2369544804096222, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5057042837142944, "rewards/format_reward_step_strict": 0.96875, "step": 180 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.2233169919015925e-06, "aux_brier/mean_group_std": 0.04418487868754871, "aux_brier/mean_r": 0.9695073453059229, "aux_brier/n_active_tok": 246.375, "aux_brier/n_groups": 14.25, "aux_brier/n_step_records": 61.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.595789124668435, "calib/avg_num_step_conf": 7.76171875, "calib/ece": 0.2915261044176707, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01340782493368703, "calib/mean_conf": 0.12863453815261044, "calib/mu_c": 0.1364423076923077, "calib/mu_w": 0.12303448275862067, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0012449799196787147, "calib/std_conf": 0.045440094416825214, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 456.140625, "completions/mean_terminated_length": 459.7322692871094, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.19306666666666666, "grad_norm": 0.12891103327274323, "learning_rate": 5.277777777777779e-07, "loss": 0.0454, "num_tokens": 40457630.0, "reward": 1.0583572387695312, "reward_std": 0.26521506905555725, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.655303955078125, "rewards/format_reward_step_strict": 0.96875, "step": 181 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.926116698969249e-06, "aux_brier/mean_group_std": 0.06110215558689792, "aux_brier/mean_r": 0.9663731998498992, "aux_brier/n_active_tok": 283.0, "aux_brier/n_groups": 17.0625, "aux_brier/n_step_records": 70.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47959572845156373, "calib/avg_num_step_conf": 8.84375, "calib/ece": 0.41738095238095235, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002128146453089258, "calib/mean_conf": 0.13023809523809524, "calib/mu_c": 0.1292753623188406, "calib/mu_w": 0.13140350877192986, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.04473846904225599, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2840.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 531.60546875, "completions/mean_terminated_length": 531.60546875, "completions/min_length": 211.0, "completions/min_terminated_length": 211.0, "epoch": 0.19413333333333332, "grad_norm": 0.29006049036979675, "learning_rate": 5.000000000000001e-07, "loss": 0.0404, "num_tokens": 40699881.0, "reward": 1.1646442413330078, "reward_std": 0.24191956222057343, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5570140480995178, "rewards/format_reward_step_strict": 0.97265625, "step": 182 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.3976111822587853e-06, "aux_brier/mean_group_std": 0.04886749718470261, "aux_brier/mean_r": 0.9676434180165906, "aux_brier/n_active_tok": 283.625, "aux_brier/n_groups": 18.5625, "aux_brier/n_step_records": 70.90625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5388257575757576, "calib/avg_num_step_conf": 8.984375, "calib/ece": 0.35023809523809524, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005977272727272748, "calib/mean_conf": 0.12595238095238095, "calib/mu_c": 0.12908333333333336, "calib/mu_w": 0.12310606060606061, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0481429513446396, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2446.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 542.4921875, "completions/mean_terminated_length": 544.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.1952, "grad_norm": 0.07587724924087524, "learning_rate": 4.7222222222222226e-07, "loss": 0.016, "num_tokens": 40945439.0, "reward": 1.1098310947418213, "reward_std": 0.287031352519989, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6111996173858643, "rewards/format_reward_step_strict": 0.9765625, "step": 183 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3190019493292127e-06, "aux_brier/mean_group_std": 0.06916217819761528, "aux_brier/mean_r": 0.9544377919865814, "aux_brier/n_active_tok": 269.625, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 67.40625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5821722399150744, "calib/avg_num_step_conf": 8.734375, "calib/ece": 0.49090909090909085, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013527070063694233, "calib/mean_conf": 0.12964426877470356, "calib/mu_c": 0.13477707006369424, "calib/mu_w": 0.12125000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.050891964320219044, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2136.0, "completions/max_terminated_length": 2136.0, "completions/mean_length": 486.44140625, "completions/mean_terminated_length": 490.2716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.19626666666666667, "grad_norm": 0.2684814929962158, "learning_rate": 4.444444444444445e-07, "loss": -0.0159, "num_tokens": 41175248.0, "reward": 1.237707495689392, "reward_std": 0.23681330680847168, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5211422443389893, "rewards/format_reward_step_strict": 0.98828125, "step": 184 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.602204194461336e-07, "aux_brier/mean_group_std": 0.08506167947464682, "aux_brier/mean_r": 0.9477614360696893, "aux_brier/n_active_tok": 311.75, "aux_brier/n_groups": 22.6875, "aux_brier/n_step_records": 77.9375, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5431345353675451, "calib/avg_num_step_conf": 10.2578125, "calib/ece": 0.4515637860082305, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002054785020804445, "calib/mean_conf": 0.1320576131687243, "calib/mu_c": 0.13292857142857142, "calib/mu_w": 0.13087378640776698, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0037448559670781893, "calib/std_conf": 0.05286722219655959, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2793.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 557.3046875, "completions/mean_terminated_length": 563.9130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.19733333333333333, "grad_norm": 0.10312332212924957, "learning_rate": 4.1666666666666667e-07, "loss": 0.0405, "num_tokens": 41424838.0, "reward": 1.1510381698608398, "reward_std": 0.24840062856674194, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5260277390480042, "rewards/format_reward_step_strict": 0.9453125, "step": 185 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.78480908625312e-08, "aux_brier/mean_group_std": 0.06988757628812112, "aux_brier/mean_r": 0.9571196268251531, "aux_brier/n_active_tok": 277.375, "aux_brier/n_groups": 15.875, "aux_brier/n_step_records": 69.34375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5519071837253656, "calib/avg_num_step_conf": 8.69921875, "calib/ece": 0.43569169960474313, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006237762237762234, "calib/mean_conf": 0.12952569169960476, "calib/mu_c": 0.13223776223776224, "calib/mu_w": 0.126, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04043943810398236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2679.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 528.5546875, "completions/mean_terminated_length": 528.5546875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.1984, "grad_norm": 0.06944001466035843, "learning_rate": 3.8888888888888895e-07, "loss": 0.053, "num_tokens": 41665188.0, "reward": 1.1886343955993652, "reward_std": 0.26504650712013245, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5592253804206848, "rewards/format_reward_step_strict": 0.98046875, "step": 186 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.031887712174225e-07, "aux_brier/mean_group_std": 0.052017654428443166, "aux_brier/mean_r": 0.9697135044805943, "aux_brier/n_active_tok": 345.0, "aux_brier/n_groups": 28.90625, "aux_brier/n_step_records": 86.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4750064499484004, "calib/avg_num_step_conf": 10.78125, "calib/ece": 0.4188, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.011123581011351857, "calib/mean_conf": 0.13544, "calib/mu_c": 0.13036764705882356, "calib/mu_w": 0.14149122807017542, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0051199999999999996, "calib/std_conf": 0.06947810014673689, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 608.14453125, "completions/mean_terminated_length": 608.14453125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.19946666666666665, "grad_norm": 0.06862202286720276, "learning_rate": 3.611111111111111e-07, "loss": 0.0999, "num_tokens": 41922417.0, "reward": 1.1598312854766846, "reward_std": 0.27732712030410767, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5612000226974487, "rewards/format_reward_step_strict": 0.9765625, "step": 187 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.082003190797323e-06, "aux_brier/mean_group_std": 0.08251930780726537, "aux_brier/mean_r": 0.9502170981486534, "aux_brier/n_active_tok": 281.5, "aux_brier/n_groups": 17.09375, "aux_brier/n_step_records": 70.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4928007135575942, "calib/avg_num_step_conf": 8.796875, "calib/ece": 0.4426877470355732, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006285677879714596, "calib/mean_conf": 0.13256916996047433, "calib/mu_c": 0.1298611111111111, "calib/mu_w": 0.1361467889908257, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003043478260869565, "calib/std_conf": 0.06225124534924887, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 556.95703125, "completions/mean_terminated_length": 556.95703125, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.20053333333333334, "grad_norm": 0.05265188589692116, "learning_rate": 3.3333333333333335e-07, "loss": 0.0109, "num_tokens": 42169070.0, "reward": 1.1943097114562988, "reward_std": 0.2656799554824829, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5506765842437744, "rewards/format_reward_step_strict": 0.98828125, "step": 188 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.26180673568866e-06, "aux_brier/mean_group_std": 0.07424568267242235, "aux_brier/mean_r": 0.9418080546756995, "aux_brier/n_active_tok": 265.5, "aux_brier/n_groups": 17.1875, "aux_brier/n_step_records": 66.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.639369975818574, "calib/avg_num_step_conf": 8.296875, "calib/ece": 0.4448399999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018399451016273424, "calib/mean_conf": 0.12716000000000002, "calib/mu_c": 0.13503496503496504, "calib/mu_w": 0.11663551401869161, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03911437587383953, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 487.89453125, "completions/mean_terminated_length": 489.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.2016, "grad_norm": 0.0532534196972847, "learning_rate": 3.055555555555556e-07, "loss": 0.029, "num_tokens": 42401739.0, "reward": 1.1886670589447021, "reward_std": 0.23333409428596497, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5515433549880981, "rewards/format_reward_step_strict": 0.9765625, "step": 189 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.796264378934765e-06, "aux_brier/mean_group_std": 0.06688086772080293, "aux_brier/mean_r": 0.962069712552071, "aux_brier/n_active_tok": 300.625, "aux_brier/n_groups": 19.21875, "aux_brier/n_step_records": 75.15625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7096249521622655, "calib/avg_num_step_conf": 9.39453125, "calib/ece": 0.4048605577689243, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.030473912488837906, "calib/mean_conf": 0.129003984063745, "calib/mu_c": 0.14320895522388064, "calib/mu_w": 0.11273504273504273, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.041565529122325265, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 574.0078125, "completions/mean_terminated_length": 574.0078125, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 0.20266666666666666, "grad_norm": 0.09842872619628906, "learning_rate": 2.7777777777777776e-07, "loss": 0.0628, "num_tokens": 42654293.0, "reward": 1.157977819442749, "reward_std": 0.2416093796491623, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5850362777709961, "rewards/format_reward_step_strict": 0.9765625, "step": 190 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.1005596038968797e-06, "aux_brier/mean_group_std": 0.05125306216489163, "aux_brier/mean_r": 0.9721076662297099, "aux_brier/n_active_tok": 279.125, "aux_brier/n_groups": 16.03125, "aux_brier/n_step_records": 69.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5840792838874681, "calib/avg_num_step_conf": 8.734375, "calib/ece": 0.32641434262948205, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012413682864450132, "calib/mean_conf": 0.13414342629482073, "calib/mu_c": 0.1408695652173913, "calib/mu_w": 0.12845588235294117, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001195219123505976, "calib/std_conf": 0.04556379165711359, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 505.16015625, "completions/mean_terminated_length": 507.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.20373333333333332, "grad_norm": 0.05959112197160721, "learning_rate": 2.5000000000000004e-07, "loss": 0.0554, "num_tokens": 42887782.0, "reward": 1.0989865064620972, "reward_std": 0.23148463666439056, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6381340026855469, "rewards/format_reward_step_strict": 0.98046875, "step": 191 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.396978059928447e-06, "aux_brier/mean_group_std": 0.06359662057148917, "aux_brier/mean_r": 0.9631366202545875, "aux_brier/n_active_tok": 254.0, "aux_brier/n_groups": 15.625, "aux_brier/n_step_records": 63.5, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5367821390937829, "calib/avg_num_step_conf": 8.18359375, "calib/ece": 0.46372, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038751317175974853, "calib/mean_conf": 0.12284, "calib/mu_c": 0.12445205479452055, "calib/mu_w": 0.12057692307692307, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.00128, "calib/std_conf": 0.044221424671758366, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2674.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 521.80078125, "completions/mean_terminated_length": 525.909423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.2048, "grad_norm": 0.13064619898796082, "learning_rate": 2.2222222222222224e-07, "loss": 0.0396, "num_tokens": 43126339.0, "reward": 1.1853406429290771, "reward_std": 0.2816307544708252, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.530424952507019, "rewards/format_reward_step_strict": 0.96484375, "step": 192 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.011821052138151e-06, "aux_brier/mean_group_std": 0.036070646126805155, "aux_brier/mean_r": 0.9748769477094339, "aux_brier/n_active_tok": 291.75, "aux_brier/n_groups": 19.34375, "aux_brier/n_step_records": 72.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5001913509376196, "calib/avg_num_step_conf": 9.39453125, "calib/ece": 0.4004780876494024, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004742314070672299, "calib/mean_conf": 0.13338645418326692, "calib/mu_c": 0.13559701492537315, "calib/mu_w": 0.13085470085470086, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0513491879135335, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2482.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 527.77734375, "completions/mean_terminated_length": 531.9330444335938, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.20586666666666667, "grad_norm": 0.05980648100376129, "learning_rate": 1.9444444444444447e-07, "loss": 0.0526, "num_tokens": 43367162.0, "reward": 1.153533935546875, "reward_std": 0.3433189392089844, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5750734806060791, "rewards/format_reward_step_strict": 0.97265625, "step": 193 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.1731222970823296e-06, "aux_brier/mean_group_std": 0.07410394084552661, "aux_brier/mean_r": 0.9527812351996452, "aux_brier/n_active_tok": 257.75, "aux_brier/n_groups": 13.96875, "aux_brier/n_step_records": 64.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5557051282051282, "calib/avg_num_step_conf": 8.28515625, "calib/ece": 0.39520000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009551282051282042, "calib/mean_conf": 0.1248, "calib/mu_c": 0.12938461538461538, "calib/mu_w": 0.11983333333333333, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.048192945541852904, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2199.0, "completions/max_terminated_length": 2199.0, "completions/mean_length": 472.33203125, "completions/mean_terminated_length": 476.0511779785156, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.20693333333333333, "grad_norm": 0.1151818186044693, "learning_rate": 1.6666666666666668e-07, "loss": -0.006, "num_tokens": 43594023.0, "reward": 1.138839840888977, "reward_std": 0.3054915964603424, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5787968635559082, "rewards/format_reward_step_strict": 0.97265625, "step": 194 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.9951615692080935e-06, "aux_brier/mean_group_std": 0.06479303479010276, "aux_brier/mean_r": 0.9559623480273672, "aux_brier/n_active_tok": 267.125, "aux_brier/n_groups": 15.5625, "aux_brier/n_step_records": 66.78125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4836433188233742, "calib/avg_num_step_conf": 8.4296875, "calib/ece": 0.4182591093117408, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004160401002506281, "calib/mean_conf": 0.12530364372469638, "calib/mu_c": 0.12338345864661654, "calib/mu_w": 0.12754385964912282, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0025506072874493927, "calib/std_conf": 0.04489088104836181, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 514.15234375, "completions/mean_terminated_length": 520.2490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.208, "grad_norm": 0.34284332394599915, "learning_rate": 1.3888888888888888e-07, "loss": -0.0164, "num_tokens": 43831630.0, "reward": 1.1381409168243408, "reward_std": 0.2621338367462158, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5525633096694946, "rewards/format_reward_step_strict": 0.9609375, "step": 195 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.7311345216208807e-06, "aux_brier/mean_group_std": 0.05536466923083148, "aux_brier/mean_r": 0.9579037857900614, "aux_brier/n_active_tok": 244.25, "aux_brier/n_groups": 12.875, "aux_brier/n_step_records": 61.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4931811072880825, "calib/avg_num_step_conf": 7.63671875, "calib/ece": 0.42236220472440944, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002194557397560165, "calib/mean_conf": 0.12984251968503938, "calib/mu_c": 0.12884892086330937, "calib/mu_w": 0.13104347826086954, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002480314960629921, "calib/std_conf": 0.046201957417859894, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3002.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 431.05859375, "completions/mean_terminated_length": 432.7490539550781, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.20906666666666668, "grad_norm": 0.03306657820940018, "learning_rate": 1.1111111111111112e-07, "loss": 0.0166, "num_tokens": 44044525.0, "reward": 1.1816363334655762, "reward_std": 0.16497869789600372, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5702953338623047, "rewards/format_reward_step_strict": 0.9921875, "step": 196 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.614795514028657e-06, "aux_brier/mean_group_std": 0.10405568824929422, "aux_brier/mean_r": 0.9350196740294592, "aux_brier/n_active_tok": 285.875, "aux_brier/n_groups": 18.09375, "aux_brier/n_step_records": 71.46875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5083735909822866, "calib/avg_num_step_conf": 9.14453125, "calib/ece": 0.32848000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004959742351046681, "calib/mean_conf": 0.13392, "calib/mu_c": 0.1336521739130435, "calib/mu_w": 0.13414814814814816, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0012, "calib/std_conf": 0.048454448712166774, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 522.1875, "completions/mean_terminated_length": 526.2991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.21013333333333334, "grad_norm": 0.103756844997406, "learning_rate": 8.333333333333334e-08, "loss": 0.0285, "num_tokens": 44283261.0, "reward": 1.0892724990844727, "reward_std": 0.2750256061553955, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6227148771286011, "rewards/format_reward_step_strict": 0.96875, "step": 197 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.3878599466352632e-06, "aux_brier/mean_group_std": 0.054694135177370894, "aux_brier/mean_r": 0.968603385805794, "aux_brier/n_active_tok": 265.0, "aux_brier/n_groups": 15.53125, "aux_brier/n_step_records": 66.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.54859477124183, "calib/avg_num_step_conf": 8.28125, "calib/ece": 0.47019841269841267, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009125490196078431, "calib/mean_conf": 0.12503968253968253, "calib/mu_c": 0.12873333333333334, "calib/mu_w": 0.11960784313725491, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.048415349263881836, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 498.203125, "completions/mean_terminated_length": 498.203125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.2112, "grad_norm": 0.13945958018302917, "learning_rate": 5.555555555555556e-08, "loss": 0.1007, "num_tokens": 44516185.0, "reward": 1.2089036703109741, "reward_std": 0.22615382075309753, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.530927300453186, "rewards/format_reward_step_strict": 0.98046875, "step": 198 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.530694379529688e-06, "aux_brier/mean_group_std": 0.06728849122525137, "aux_brier/mean_r": 0.9580130139995875, "aux_brier/n_active_tok": 299.5, "aux_brier/n_groups": 20.0, "aux_brier/n_step_records": 74.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5274936061381074, "calib/avg_num_step_conf": 9.359375, "calib/ece": 0.41394422310756973, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005650255754475725, "calib/mean_conf": 0.1302788844621514, "calib/mu_c": 0.13286764705882353, "calib/mu_w": 0.1272173913043478, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.001195219123505976, "calib/std_conf": 0.04652380096387325, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2985.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 574.96875, "completions/mean_terminated_length": 577.2235717773438, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.21226666666666666, "grad_norm": 0.10852678120136261, "learning_rate": 2.777777777777778e-08, "loss": 0.0178, "num_tokens": 44767577.0, "reward": 1.1592490673065186, "reward_std": 0.2842707633972168, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5666840076446533, "rewards/format_reward_step_strict": 0.97265625, "step": 199 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.376076377989648e-07, "aux_brier/mean_group_std": 0.055945407588163985, "aux_brier/mean_r": 0.9684017162047385, "aux_brier/n_active_tok": 275.625, "aux_brier/n_groups": 18.78125, "aux_brier/n_step_records": 68.90625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5670890410958905, "calib/avg_num_step_conf": 8.61328125, "calib/ece": 0.4642682926829268, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010998630136986307, "calib/mean_conf": 0.12922764227642275, "calib/mu_c": 0.1336986301369863, "calib/mu_w": 0.12269999999999999, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.04538692068468501, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2933.0, "completions/max_terminated_length": 2933.0, "completions/mean_length": 561.66015625, "completions/mean_terminated_length": 561.66015625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.21333333333333335, "grad_norm": 0.1206001341342926, "learning_rate": 0.0, "loss": 0.0931, "num_tokens": 45019410.0, "reward": 1.1839895248413086, "reward_std": 0.2561890482902527, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5250203013420105, "rewards/format_reward_step_strict": 0.95703125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.03789105351956096, "train_runtime": 16013.4832, "train_samples_per_second": 3.197, "train_steps_per_second": 0.012 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 45019410, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }