{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/final_loss": 0.6349183150700161, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.33182350758995327, "aux_distill/mean_u": 0.31677682190706, "aux_distill/n_active_final_tok": 3.7142857142857144, "aux_distill/n_active_tok": 24.571428571428573, "aux_distill/step_loss": 1.4134800136089325, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.8921052631578947, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.19807541241162607, "calib/step_q_w": 0.6940298507462687, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.06599244475364685, "learning_rate": 2.5000000000000004e-07, "loss": 0.1766, "num_tokens": 264685.0, "reward": 0.037574999034404755, "reward_std": 0.07449960708618164, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/final_loss": 0.5865676277562192, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.28689150669072805, "aux_distill/mean_u": 0.2935626227740425, "aux_distill/n_active_final_tok": 4.631578947368421, "aux_distill/n_active_tok": 28.63157894736842, "aux_distill/step_loss": 1.1092121005058289, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.781, "calib/step_q_c_n": 20.0, "calib/step_q_gap": -0.042553719008264435, "calib/step_q_w": 0.8235537190082645, "calib/step_q_w_n": 121.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.11038576066493988, "learning_rate": 5.000000000000001e-07, "loss": 0.2358, "num_tokens": 533467.0, "reward": 0.07537207007408142, "reward_std": 0.14035090804100037, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/final_loss": 0.6699267900907077, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.3295047856294192, "aux_distill/mean_u": 0.3362008123569135, "aux_distill/n_active_final_tok": 4.0, "aux_distill/n_active_tok": 19.384615384615383, "aux_distill/step_loss": 1.285267398907588, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.2272727272727273, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.727857142857143, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.6428571428571429, "calib/gap": -0.13848484848484843, "calib/mean_conf": 0.8821428571428571, "calib/mu_c": 0.7733333333333333, "calib/mu_w": 0.9118181818181817, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.697857142857143, "calib/std_conf": 0.1185133248342367, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.6426666666666667, "calib/step_q_c_n": 15.0, "calib/step_q_gap": -0.04337500000000005, "calib/step_q_w": 0.6860416666666668, "calib/step_q_w_n": 48.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 675.6953125, "completions/mean_terminated_length": 742.3948364257812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.06638622283935547, "learning_rate": 7.5e-07, "loss": 0.1354, "num_tokens": 811701.0, "reward": 0.0374503917992115, "reward_std": 0.08624368160963058, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.012400781735777855, "rewards/format_reward_step": 0.046875, "step": 3 }, { "aux_distill/final_loss": 0.4266670833934437, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.2545812319625508, "aux_distill/mean_u": 0.24295508469849525, "aux_distill/n_active_final_tok": 2.5454545454545454, "aux_distill/n_active_tok": 27.272727272727273, "aux_distill/step_loss": 1.2658109773289075, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.36111111111111116, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.3433333333333333, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": -0.03666666666666685, "calib/mean_conf": 0.9455555555555555, "calib/mu_c": 0.9333333333333332, "calib/mu_w": 0.9700000000000001, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.3111111111111111, "calib/std_conf": 0.04991350543381378, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.8508000000000001, "calib/step_q_c_n": 25.0, "calib/step_q_gap": 0.04540000000000022, "calib/step_q_w": 0.8053999999999999, "calib/step_q_w_n": 50.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 638.25, "completions/mean_terminated_length": 707.3246459960938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.042896121740341187, "learning_rate": 1.0000000000000002e-06, "loss": 0.1057, "num_tokens": 1081261.0, "reward": 0.03545898199081421, "reward_std": 0.07372425496578217, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.016230467706918716, "rewards/format_reward_step": 0.02734375, "step": 4 }, { "aux_distill/final_loss": 0.4871633052825928, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.3, "aux_distill/loss": 0.2575274249538779, "aux_distill/mean_u": 0.2221291438053116, "aux_distill/n_active_final_tok": 3.0, "aux_distill/n_active_tok": 28.5, "aux_distill/step_loss": 1.1137842237949371, "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.2265625, "calib/ece": 0.8971428571428571, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.5714285714285714, "calib/mean_conf": 0.8971428571428574, "calib/mu_c": NaN, "calib/mu_w": 0.8971428571428574, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.8971428571428571, "calib/std_conf": 0.08955673153518558, "calib/step_conf_rate": 0.0390625, "calib/step_q_w": 0.7539655172413793, "calib/step_q_w_n": 58.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2935.0, "completions/max_terminated_length": 2935.0, "completions/mean_length": 676.33984375, "completions/mean_terminated_length": 772.9598388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.03589190915226936, "learning_rate": 1.25e-06, "loss": 0.0637, "num_tokens": 1361092.0, "reward": 0.010898242704570293, "reward_std": 0.0218950267881155, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0022652344778180122, "rewards/format_reward_step": 0.01953125, "step": 5 }, { "aux_distill/final_loss": 0.5564870668782128, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.293852342499627, "aux_distill/mean_u": 0.32411118324568666, "aux_distill/n_active_final_tok": 3.5555555555555554, "aux_distill/n_active_tok": 28.0, "aux_distill/step_loss": 1.269062125020557, "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.46078431372549017, "calib/avg_num_step_conf": 0.51171875, "calib/ece": 0.6397499999999999, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.65, "calib/gap": 0.16107843137254907, "calib/mean_conf": 0.7897500000000001, "calib/mu_c": 0.9266666666666666, "calib/mu_w": 0.7655882352941176, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.6397499999999999, "calib/std_conf": 0.32012683033447853, "calib/step_conf_rate": 0.09765625, "calib/step_q_c": 0.8450000000000001, "calib/step_q_c_n": 6.0, "calib/step_q_gap": 0.08832000000000018, "calib/step_q_w": 0.7566799999999999, "calib/step_q_w_n": 125.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2939.0, "completions/max_terminated_length": 2939.0, "completions/mean_length": 679.51171875, "completions/mean_terminated_length": 749.8060302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.09765376895666122, "learning_rate": 1.5e-06, "loss": 0.1967, "num_tokens": 1640999.0, "reward": 0.04811655357480049, "reward_std": 0.09226097911596298, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.022014355286955833, "rewards/format_reward_step": 0.0625, "step": 6 }, { "aux_distill/final_loss": 0.6406730846925215, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.3194320486350493, "aux_distill/mean_u": 0.38078354723265195, "aux_distill/n_active_final_tok": 4.7272727272727275, "aux_distill/n_active_tok": 28.0, "aux_distill/step_loss": 1.272301191633398, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.7159090909090908, "calib/avg_num_step_conf": 0.30078125, "calib/ece": 0.578, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.16749999999999998, "calib/mean_conf": 0.8446666666666668, "calib/mu_c": 0.9675, "calib/mu_w": 0.8, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.578, "calib/std_conf": 0.26045771676458773, "calib/step_conf_rate": 0.06640625, "calib/step_q_c": 0.853529411764706, "calib/step_q_c_n": 17.0, "calib/step_q_gap": 0.048362745098039395, "calib/step_q_w": 0.8051666666666666, "calib/step_q_w_n": 60.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 712.31640625, "completions/mean_terminated_length": 779.286376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.060178689658641815, "learning_rate": 1.75e-06, "loss": 0.1328, "num_tokens": 1930776.0, "reward": 0.04149726778268814, "reward_std": 0.08895318955183029, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.024400781840085983, "rewards/format_reward_step": 0.04296875, "step": 7 }, { "aux_distill/final_loss": 0.3895903135600843, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.26361509020391266, "aux_distill/mean_u": 0.3847196649220619, "aux_distill/n_active_final_tok": 2.526315789473684, "aux_distill/n_active_tok": 26.526315789473685, "aux_distill/step_loss": 1.4673798743047213, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.5454545454545454, "calib/avg_num_step_conf": 0.4921875, "calib/ece": 0.7092857142857141, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6428571428571429, "calib/gap": 0.020909090909090877, "calib/mean_conf": 0.9235714285714284, "calib/mu_c": 0.9400000000000001, "calib/mu_w": 0.9190909090909092, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.7092857142857141, "calib/std_conf": 0.07450859421734449, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.7776923076923078, "calib/step_q_c_n": 13.0, "calib/step_q_gap": -0.047422736555479794, "calib/step_q_w": 0.8251150442477876, "calib/step_q_w_n": 113.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 697.80078125, "completions/mean_terminated_length": 760.1574096679688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.07126282900571823, "learning_rate": 2.0000000000000003e-06, "loss": 0.1552, "num_tokens": 2215925.0, "reward": 0.03193320333957672, "reward_std": 0.05160636454820633, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.013085155747830868, "rewards/format_reward_step": 0.0390625, "step": 8 }, { "aux_distill/final_loss": 0.6693086624145508, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.33453326443066966, "aux_distill/mean_u": 0.32446420034839313, "aux_distill/n_active_final_tok": 3.3846153846153846, "aux_distill/n_active_tok": 22.76923076923077, "aux_distill/step_loss": 1.337406561924861, "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.49999999999999994, "calib/avg_num_step_conf": 0.2890625, "calib/ece": 0.7138461538461541, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9230769230769231, "calib/gap": 0.02866666666666673, "calib/mean_conf": 0.9446153846153847, "calib/mu_c": 0.9666666666666667, "calib/mu_w": 0.938, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7138461538461541, "calib/std_conf": 0.0731295357163085, "calib/step_conf_rate": 0.05859375, "calib/step_q_c": 0.8666666666666666, "calib/step_q_c_n": 6.0, "calib/step_q_gap": 0.062254901960784204, "calib/step_q_w": 0.8044117647058824, "calib/step_q_w_n": 68.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2976.0, "completions/max_terminated_length": 2976.0, "completions/mean_length": 692.4453125, "completions/mean_terminated_length": 791.3660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.0522342287003994, "learning_rate": 2.25e-06, "loss": 0.168, "num_tokens": 2500727.0, "reward": 0.02944277413189411, "reward_std": 0.06517241150140762, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.008104296401143074, "rewards/format_reward_step": 0.0390625, "step": 9 }, { "aux_distill/final_loss": 0.39707126617431643, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.24192016646265985, "aux_distill/mean_u": 0.2629905919192007, "aux_distill/n_active_final_tok": 3.2, "aux_distill/n_active_tok": 31.2, "aux_distill/step_loss": 1.2279877960681915, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.9583333333333335, "calib/avg_num_step_conf": 0.32421875, "calib/ece": 0.6361538461538462, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.5384615384615384, "calib/gap": 0.2783333333333332, "calib/mean_conf": 0.713076923076923, "calib/mu_c": 0.97, "calib/mu_w": 0.6916666666666668, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6361538461538462, "calib/std_conf": 0.3008803650669818, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.83, "calib/step_q_c_n": 4.0, "calib/step_q_gap": 0.11149367088607587, "calib/step_q_w": 0.7185063291139241, "calib/step_q_w_n": 79.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 687.68359375, "completions/mean_terminated_length": 755.5665283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.010666666666666666, "grad_norm": 0.039525002241134644, "learning_rate": 2.5e-06, "loss": 0.09, "num_tokens": 2783574.0, "reward": 0.024364061653614044, "reward_std": 0.06144634634256363, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.017478125169873238, "rewards/format_reward_step": 0.02734375, "step": 10 }, { "aux_distill/final_loss": 0.3394656909836663, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.2320902186135451, "aux_distill/mean_u": 0.30481761098029164, "aux_distill/n_active_final_tok": 4.444444444444445, "aux_distill/n_active_tok": 28.22222222222222, "aux_distill/step_loss": 1.3025050461292267, "calib/answer_extract_rate": 0.12109375, "calib/auroc": 0.48958333333333337, "calib/avg_num_step_conf": 0.49609375, "calib/ece": 0.5818181818181818, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.7727272727272727, "calib/gap": 0.13124999999999987, "calib/mean_conf": 0.8545454545454547, "calib/mu_c": 0.9499999999999998, "calib/mu_w": 0.81875, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.5818181818181818, "calib/std_conf": 0.27113779242174785, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.8314535714285715, "calib/step_q_c_n": 28.0, "calib/step_q_gap": 0.10493841991342001, "calib/step_q_w": 0.7265151515151514, "calib/step_q_w_n": 99.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 677.2578125, "completions/mean_terminated_length": 744.111572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.08062788844108582, "learning_rate": 2.7500000000000004e-06, "loss": 0.1336, "num_tokens": 3061432.0, "reward": 0.06968769431114197, "reward_std": 0.1363307684659958, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.033906638622283936, "rewards/format_reward_step": 0.078125, "step": 11 }, { "aux_distill/final_loss": 0.4832667452948434, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.2690164814037936, "aux_distill/mean_u": 0.31713087130615536, "aux_distill/n_active_final_tok": 5.142857142857143, "aux_distill/n_active_tok": 29.428571428571427, "aux_distill/step_loss": 1.2403645345142909, "calib/answer_extract_rate": 0.12109375, "calib/auroc": 0.9385964912280702, "calib/avg_num_step_conf": 0.40234375, "calib/ece": 0.7997727272727273, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.8636363636363636, "calib/gap": 0.062368421052631406, "calib/mean_conf": 0.9361363636363635, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9276315789473685, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.7997727272727273, "calib/std_conf": 0.08033922140441596, "calib/step_conf_rate": 0.0859375, "calib/step_q_c": 0.8309090909090909, "calib/step_q_c_n": 11.0, "calib/step_q_gap": 0.05264822134387359, "calib/step_q_w": 0.7782608695652173, "calib/step_q_w_n": 92.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2911.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 593.21484375, "completions/mean_terminated_length": 677.9598388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0128, "grad_norm": 0.056407034397125244, "learning_rate": 3e-06, "loss": 0.1305, "num_tokens": 3317471.0, "reward": 0.05259453132748604, "reward_std": 0.08637464046478271, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.019251562654972076, "rewards/format_reward_step": 0.0703125, "step": 12 }, { "aux_distill/final_loss": 0.4467930793762207, "aux_distill/lambda": 0.10000000000000002, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.26120452769100666, "aux_distill/mean_u": 0.2673173649834991, "aux_distill/n_active_final_tok": 3.6, "aux_distill/n_active_tok": 25.8, "aux_distill/step_loss": 1.2716659754514694, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.4852941176470588, "calib/avg_num_step_conf": 0.5078125, "calib/ece": 0.824736842105263, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.8421052631578947, "calib/gap": 0.022352941176470575, "calib/mean_conf": 0.93, "calib/mu_c": 0.95, "calib/mu_w": 0.9276470588235294, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.824736842105263, "calib/std_conf": 0.08039376776054748, "calib/step_conf_rate": 0.09765625, "calib/step_q_c": 0.8225, "calib/step_q_c_n": 8.0, "calib/step_q_gap": 0.135860655737705, "calib/step_q_w": 0.686639344262295, "calib/step_q_w_n": 122.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2967.0, "completions/max_terminated_length": 2967.0, "completions/mean_length": 666.85546875, "completions/mean_terminated_length": 714.2886962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.013866666666666666, "grad_norm": 0.07662098854780197, "learning_rate": 3.2500000000000002e-06, "loss": 0.1975, "num_tokens": 3592778.0, "reward": 0.042661331593990326, "reward_std": 0.09532182663679123, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01501015666872263, "rewards/format_reward_step": 0.05859375, "step": 13 }, { "aux_distill/final_loss": 0.2638796418905258, "aux_distill/lambda": 0.10000000000000002, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.20986821176484227, "aux_distill/mean_u": 0.2926287374603647, "aux_distill/n_active_final_tok": 3.5, "aux_distill/n_active_tok": 22.5, "aux_distill/step_loss": 1.3070431612432003, "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.7916666666666667, "calib/avg_num_step_conf": 0.3515625, "calib/ece": 0.6257894736842105, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.5789473684210527, "calib/gap": 0.15983333333333338, "calib/mean_conf": 0.8363157894736841, "calib/mu_c": 0.9625, "calib/mu_w": 0.8026666666666666, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.6257894736842105, "calib/std_conf": 0.21096795781439592, "calib/step_conf_rate": 0.07421875, "calib/step_q_c": 0.92, "calib/step_q_c_n": 15.0, "calib/step_q_gap": 0.278, "calib/step_q_w": 0.642, "calib/step_q_w_n": 75.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 632.11328125, "completions/mean_terminated_length": 679.9202270507812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.03792736679315567, "learning_rate": 3.5e-06, "loss": 0.1444, "num_tokens": 3859999.0, "reward": 0.03805605694651604, "reward_std": 0.08979158848524094, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.021424610167741776, "rewards/format_reward_step": 0.0390625, "step": 14 }, { "aux_distill/final_loss": 0.04684555530548096, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.3, "aux_distill/loss": 0.13827819004654884, "aux_distill/mean_u": 0.2540449814228472, "aux_distill/n_active_final_tok": 2.0, "aux_distill/n_active_tok": 16.5, "aux_distill/step_loss": 1.2422452196478844, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.1875, "calib/avg_num_step_conf": 0.12890625, "calib/ece": 0.7016666666666665, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.20750000000000002, "calib/mean_conf": 0.8683333333333333, "calib/mu_c": 0.73, "calib/mu_w": 0.9375, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.6183333333333332, "calib/std_conf": 0.17439578231393355, "calib/step_conf_rate": 0.03125, "calib/step_q_c": 0.858, "calib/step_q_c_n": 5.0, "calib/step_q_gap": 0.1637142857142858, "calib/step_q_w": 0.6942857142857142, "calib/step_q_w_n": 28.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 654.9296875, "completions/mean_terminated_length": 719.5794067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.014578004367649555, "learning_rate": 3.7500000000000005e-06, "loss": 0.0364, "num_tokens": 4135541.0, "reward": 0.014424024149775505, "reward_std": 0.034029521048069, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.005410546902567148, "rewards/format_reward_step": 0.01171875, "step": 15 }, { "aux_distill/final_loss": 0.09308773279190063, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.3, "aux_distill/loss": 0.1476732436567545, "aux_distill/mean_u": 0.43455425767035105, "aux_distill/n_active_final_tok": 2.0, "aux_distill/n_active_tok": 17.5, "aux_distill/step_loss": 1.1974692195653915, "calib/answer_extract_rate": 0.05859375, "calib/avg_num_step_conf": 0.13671875, "calib/ece": 0.7699999999999999, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.6363636363636364, "calib/mean_conf": 0.7699999999999999, "calib/mu_c": NaN, "calib/mu_w": 0.7699999999999999, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.7699999999999999, "calib/std_conf": 0.3036145882229939, "calib/step_conf_rate": 0.03515625, "calib/step_q_w": 0.6568571428571429, "calib/step_q_w_n": 35.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2968.0, "completions/max_terminated_length": 2968.0, "completions/mean_length": 676.265625, "completions/mean_terminated_length": 762.6607666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.017066666666666667, "grad_norm": 0.017469491809606552, "learning_rate": 4.000000000000001e-06, "loss": 0.0395, "num_tokens": 4417513.0, "reward": 0.010917773470282555, "reward_std": 0.026841146871447563, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.006210546940565109, "rewards/format_reward_step": 0.015625, "step": 16 }, { "aux_distill/final_loss": 0.10428959092027262, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.14978973685126556, "aux_distill/mean_u": 0.2753278144632056, "aux_distill/n_active_final_tok": 3.789473684210526, "aux_distill/n_active_tok": 24.210526315789473, "aux_distill/step_loss": 1.1850285655573796, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.4733333333333334, "calib/avg_num_step_conf": 0.44921875, "calib/ece": 0.5865000000000001, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.65, "calib/gap": 0.017999999999999794, "calib/mean_conf": 0.8365, "calib/mu_c": 0.85, "calib/mu_w": 0.8320000000000002, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.5865000000000001, "calib/std_conf": 0.22812880133819138, "calib/step_conf_rate": 0.09765625, "calib/step_q_c": 0.7936842105263158, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.15733004385964922, "calib/step_q_w": 0.6363541666666666, "calib/step_q_w_n": 96.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 618.8203125, "completions/mean_terminated_length": 682.836181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.030092423781752586, "learning_rate": 4.25e-06, "loss": 0.1002, "num_tokens": 4679459.0, "reward": 0.053166598081588745, "reward_std": 0.09677232056856155, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.024301951751112938, "rewards/format_reward_step": 0.0625, "step": 17 }, { "aux_distill/final_loss": 0.10552374521891277, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.3, "aux_distill/loss": 0.1427289280626509, "aux_distill/mean_u": 0.2952710393442671, "aux_distill/n_active_final_tok": 4.0, "aux_distill/n_active_tok": 24.0, "aux_distill/step_loss": 1.110718011856079, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.45, "calib/avg_num_step_conf": 0.21484375, "calib/ece": 0.7866666666666666, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.3560000000000001, "calib/mean_conf": 0.7866666666666666, "calib/mu_c": 0.49, "calib/mu_w": 0.8460000000000001, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.7033333333333333, "calib/std_conf": 0.3038731460476376, "calib/step_conf_rate": 0.04296875, "calib/step_q_c": 0.7327272727272728, "calib/step_q_c_n": 11.0, "calib/step_q_gap": 0.0679545454545456, "calib/step_q_w": 0.6647727272727272, "calib/step_q_w_n": 44.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 648.7890625, "completions/mean_terminated_length": 744.7982177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.016668932512402534, "learning_rate": 4.5e-06, "loss": 0.0671, "num_tokens": 4956269.0, "reward": 0.024888280779123306, "reward_std": 0.06491275131702423, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.010714062489569187, "rewards/format_reward_step": 0.03125, "step": 18 }, { "aux_distill/final_loss": 0.1327239183279184, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.16295114446144837, "aux_distill/mean_u": 0.3919296979074976, "aux_distill/n_active_final_tok": 3.3846153846153846, "aux_distill/n_active_tok": 19.384615384615383, "aux_distill/step_loss": 1.231339651804704, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.36666666666666664, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.6404615384615385, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.46153846153846156, "calib/gap": -0.13759999999999994, "calib/mean_conf": 0.7358461538461538, "calib/mu_c": 0.63, "calib/mu_w": 0.7676, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.5727692307692309, "calib/std_conf": 0.31278054389468224, "calib/step_conf_rate": 0.0625, "calib/step_q_c": 0.7255555555555556, "calib/step_q_c_n": 9.0, "calib/step_q_gap": 0.06324814814814816, "calib/step_q_w": 0.6623074074074075, "calib/step_q_w_n": 54.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3046.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 566.43359375, "completions/mean_terminated_length": 614.4364624023438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.020266666666666665, "grad_norm": 0.052968818694353104, "learning_rate": 4.75e-06, "loss": 0.0701, "num_tokens": 5206036.0, "reward": 0.037115007638931274, "reward_std": 0.0925213098526001, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01954251527786255, "rewards/format_reward_step": 0.04296875, "step": 19 }, { "aux_distill/final_loss": 0.06726464900103482, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.2999999999999999, "aux_distill/loss": 0.13842008127407593, "aux_distill/mean_u": 0.27594235949049245, "aux_distill/n_active_final_tok": 4.7272727272727275, "aux_distill/n_active_tok": 30.90909090909091, "aux_distill/step_loss": 1.1824068318713794, "calib/answer_extract_rate": 0.1875, "calib/auroc": 0.4444444444444444, "calib/avg_num_step_conf": 0.6640625, "calib/ece": 0.44977777777777783, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.30303030303030304, "calib/gap": -0.03252777777777782, "calib/mean_conf": 0.5447676767676768, "calib/mu_c": 0.5211111111111111, "calib/mu_w": 0.5536388888888889, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.2265625, "calib/nonempty_step_conf_rate": 0.14453125, "calib/pce": 0.36090909090909096, "calib/std_conf": 0.38567127612942986, "calib/step_conf_rate": 0.14453125, "calib/step_q_c": 0.5023809523809524, "calib/step_q_c_n": 42.0, "calib/step_q_gap": -0.08721117360329966, "calib/step_q_w": 0.589592125984252, "calib/step_q_w_n": 127.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 615.90625, "completions/mean_terminated_length": 656.9666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.032176196575164795, "learning_rate": 5e-06, "loss": 0.1347, "num_tokens": 5468580.0, "reward": 0.0915047749876976, "reward_std": 0.18190191686153412, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.054103296250104904, "rewards/format_reward_step": 0.09375, "step": 20 }, { "aux_distill/final_loss": 0.014147556745089017, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.1200677938759327, "aux_distill/mean_u": 0.3006582224847178, "aux_distill/n_active_final_tok": 4.0, "aux_distill/n_active_tok": 45.84615384615385, "aux_distill/step_loss": 1.1582352404411023, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.3557692307692307, "calib/avg_num_step_conf": 0.80078125, "calib/ece": 0.5235294117647058, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.29411764705882354, "calib/gap": -0.1503846153846154, "calib/mean_conf": 0.54, "calib/mu_c": 0.425, "calib/mu_w": 0.5753846153846154, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.41411764705882353, "calib/std_conf": 0.37446432328295937, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.5422222222222222, "calib/step_q_c_n": 9.0, "calib/step_q_gap": 0.3785997732426304, "calib/step_q_w": 0.1636224489795918, "calib/step_q_w_n": 196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 545.2578125, "completions/mean_terminated_length": 617.6371459960938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0224, "grad_norm": 0.010674743913114071, "learning_rate": 4.9722222222222224e-06, "loss": 0.0771, "num_tokens": 5711126.0, "reward": 0.047288283705711365, "reward_std": 0.08628718554973602, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.02817031368613243, "rewards/format_reward_step": 0.05078125, "step": 21 }, { "aux_distill/final_loss": 0.14511122873851232, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.1796576284936496, "aux_distill/mean_u": 0.3055337093556676, "aux_distill/n_active_final_tok": 4.0, "aux_distill/n_active_tok": 46.285714285714285, "aux_distill/step_loss": 1.3612425753048487, "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.6923076923076923, "calib/avg_num_step_conf": 0.640625, "calib/ece": 0.44812500000000005, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.1875, "calib/gap": 0.22948717948717945, "calib/mean_conf": 0.566875, "calib/mu_c": 0.7533333333333333, "calib/mu_w": 0.5238461538461539, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.41375, "calib/std_conf": 0.3434151778460003, "calib/step_conf_rate": 0.078125, "calib/step_q_c": 0.45249999999999996, "calib/step_q_c_n": 12.0, "calib/step_q_gap": -0.31631578947368416, "calib/step_q_w": 0.7688157894736841, "calib/step_q_w_n": 152.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 608.57421875, "completions/mean_terminated_length": 651.8619384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.023466666666666667, "grad_norm": 0.024366727098822594, "learning_rate": 4.944444444444445e-06, "loss": 0.1007, "num_tokens": 5968737.0, "reward": 0.04406660050153732, "reward_std": 0.10519158095121384, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.029539454728364944, "rewards/format_reward_step": 0.046875, "step": 22 }, { "aux_distill/final_loss": 0.08696343104044596, "aux_distill/lambda": 0.10000000000000002, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.1517236649990082, "aux_distill/mean_u": 0.23918013649526626, "aux_distill/n_active_final_tok": 3.2, "aux_distill/n_active_tok": 18.666666666666668, "aux_distill/step_loss": 1.256346340974172, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.2564102564102564, "calib/avg_num_step_conf": 0.2734375, "calib/ece": 0.625, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.3125, "calib/gap": -0.30256410256410254, "calib/mean_conf": 0.5525, "calib/mu_c": 0.3066666666666667, "calib/mu_w": 0.6092307692307692, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.495, "calib/std_conf": 0.3650428057091388, "calib/step_conf_rate": 0.0625, "calib/step_q_c": 0.5022222222222221, "calib/step_q_c_n": 9.0, "calib/step_q_gap": -0.07897449908925336, "calib/step_q_w": 0.5811967213114755, "calib/step_q_w_n": 61.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 747.35546875, "completions/mean_terminated_length": 787.33740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.022905919700860977, "learning_rate": 4.9166666666666665e-06, "loss": 0.0941, "num_tokens": 6263996.0, "reward": 0.03834843635559082, "reward_std": 0.09829157590866089, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.02200937643647194, "rewards/format_reward_step": 0.04296875, "step": 23 }, { "aux_distill/final_loss": 0.0334828474248449, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.13792851318915686, "aux_distill/mean_u": 0.4641779747125021, "aux_distill/n_active_final_tok": 4.666666666666667, "aux_distill/n_active_tok": 31.555555555555557, "aux_distill/step_loss": 1.2788365748193529, "calib/answer_extract_rate": 0.1328125, "calib/auroc": 0.5324074074074074, "calib/avg_num_step_conf": 0.5703125, "calib/ece": 0.26087499999999997, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.16666666666666666, "calib/gap": 0.11105555555555557, "calib/mean_conf": 0.3317083333333333, "calib/mu_c": 0.41500000000000004, "calib/mu_w": 0.30394444444444446, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.17578125, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.17129166666666665, "calib/std_conf": 0.33765064282068563, "calib/step_conf_rate": 0.1171875, "calib/step_q_c": 0.45787878787878794, "calib/step_q_c_n": 33.0, "calib/step_q_gap": 0.18671241619737206, "calib/step_q_w": 0.2711663716814159, "calib/step_q_w_n": 113.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 548.2890625, "completions/mean_terminated_length": 612.9345092773438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0256, "grad_norm": 0.014457042329013348, "learning_rate": 4.888888888888889e-06, "loss": 0.1061, "num_tokens": 6508870.0, "reward": 0.07793824374675751, "reward_std": 0.15149441361427307, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.05822023004293442, "rewards/format_reward_step": 0.07421875, "step": 24 }, { "aux_distill/final_loss": 0.03038620292985191, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.2999999999999999, "aux_distill/loss": 0.12594087794423103, "aux_distill/mean_u": 0.2984196592403016, "aux_distill/n_active_final_tok": 4.0, "aux_distill/n_active_tok": 19.333333333333332, "aux_distill/step_loss": 1.1682501484950383, "calib/answer_extract_rate": 0.171875, "calib/auroc": 0.5392857142857144, "calib/avg_num_step_conf": 0.45703125, "calib/ece": 0.33065656565656565, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.08203125, "calib/frac_conf_gt_0.9": 0.12121212121212122, "calib/gap": 0.05558333333333321, "calib/mean_conf": 0.3621717171717172, "calib/mu_c": 0.40933333333333327, "calib/mu_w": 0.35375000000000006, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.2109375, "calib/nonempty_step_conf_rate": 0.1328125, "calib/pce": 0.27065656565656565, "calib/std_conf": 0.3598869268851573, "calib/step_conf_rate": 0.1328125, "calib/step_q_c": 0.21714285714285714, "calib/step_q_c_n": 7.0, "calib/step_q_gap": -0.23299350649350642, "calib/step_q_w": 0.45013636363636356, "calib/step_q_w_n": 110.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 616.29296875, "completions/mean_terminated_length": 657.3792114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.02666666666666667, "grad_norm": 0.017868278548121452, "learning_rate": 4.861111111111111e-06, "loss": 0.1017, "num_tokens": 6769865.0, "reward": 0.0797935277223587, "reward_std": 0.12491872906684875, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.058024562895298004, "rewards/format_reward_step": 0.08203125, "step": 25 }, { "aux_distill/final_loss": 0.10417546166314019, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.14635848143586405, "aux_distill/mean_u": 0.3878459739490986, "aux_distill/n_active_final_tok": 3.8518518518518516, "aux_distill/n_active_tok": 38.074074074074076, "aux_distill/step_loss": 1.151058386873316, "calib/answer_extract_rate": 0.18359375, "calib/auroc": 0.3146551724137931, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.4688108108108108, "calib/final_conf_rate": 0.14453125, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.24324324324324326, "calib/gap": -0.16362931034482767, "calib/mean_conf": 0.37200000000000005, "calib/mu_c": 0.24375, "calib/mu_w": 0.40737931034482766, "calib/nonempty_final_conf_rate": 0.14453125, "calib/nonempty_reasoning_rate": 0.23828125, "calib/nonempty_step_conf_rate": 0.15234375, "calib/pce": 0.3122972972972973, "calib/std_conf": 0.42094481887836616, "calib/step_conf_rate": 0.15234375, "calib/step_q_c": 0.09161714285714286, "calib/step_q_c_n": 35.0, "calib/step_q_gap": -0.1744870012870013, "calib/step_q_w": 0.26610414414414413, "calib/step_q_w_n": 222.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 580.80859375, "completions/mean_terminated_length": 619.5291748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.027733333333333332, "grad_norm": 0.029179969802498817, "learning_rate": 4.833333333333333e-06, "loss": 0.1487, "num_tokens": 7023792.0, "reward": 0.08295506238937378, "reward_std": 0.14621737599372864, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.048722632229328156, "rewards/format_reward_step": 0.0859375, "step": 26 }, { "aux_distill/final_loss": 0.17474313102662564, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.2999999999999999, "aux_distill/loss": 0.17030360639095307, "aux_distill/mean_u": 0.3229898320624979, "aux_distill/n_active_final_tok": 4.96, "aux_distill/n_active_tok": 31.2, "aux_distill/step_loss": 1.1788066244125366, "calib/answer_extract_rate": 0.1953125, "calib/auroc": 0.7027027027027026, "calib/avg_num_step_conf": 0.796875, "calib/ece": 0.3922894736842105, "calib/final_conf_rate": 0.1484375, "calib/format_rate": 0.109375, "calib/frac_conf_gt_0.9": 0.13157894736842105, "calib/gap": 0.30543243243243245, "calib/mean_conf": 0.41860526315789476, "calib/mu_c": 0.716, "calib/mu_w": 0.4105675675675675, "calib/nonempty_final_conf_rate": 0.1484375, "calib/nonempty_reasoning_rate": 0.27734375, "calib/nonempty_step_conf_rate": 0.203125, "calib/pce": 0.3922894736842105, "calib/std_conf": 0.3575583529464426, "calib/step_conf_rate": 0.203125, "calib/step_q_c": 0.32939999999999997, "calib/step_q_c_n": 5.0, "calib/step_q_gap": -0.03769748743718587, "calib/step_q_w": 0.36709748743718584, "calib/step_q_w_n": 199.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 603.1640625, "completions/mean_terminated_length": 643.3750610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.0288, "grad_norm": 0.16049453616142273, "learning_rate": 4.805555555555556e-06, "loss": 0.1824, "num_tokens": 7283418.0, "reward": 0.09525414556264877, "reward_std": 0.1646938920021057, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.07722704112529755, "rewards/format_reward_step": 0.109375, "step": 27 }, { "aux_distill/final_loss": 0.14594669759805715, "aux_distill/lambda": 0.10000000000000002, "aux_distill/lambda_final": 0.29999999999999993, "aux_distill/loss": 0.1631264030223801, "aux_distill/mean_u": 0.33193691985367646, "aux_distill/n_active_final_tok": 3.8095238095238093, "aux_distill/n_active_tok": 20.761904761904763, "aux_distill/step_loss": 1.1934239126387096, "calib/answer_extract_rate": 0.16015625, "calib/auroc": 0.6851851851851852, "calib/avg_num_step_conf": 0.42578125, "calib/ece": 0.3027239583333333, "calib/final_conf_rate": 0.125, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.125, "calib/gap": 0.2347222222222221, "calib/mean_conf": 0.3632864583333334, "calib/mu_c": 0.5613333333333332, "calib/mu_w": 0.32661111111111113, "calib/nonempty_final_conf_rate": 0.125, "calib/nonempty_reasoning_rate": 0.19921875, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.2548802083333333, "calib/std_conf": 0.3476928003180568, "calib/step_conf_rate": 0.12109375, "calib/step_q_c": 0.37076923076923074, "calib/step_q_c_n": 13.0, "calib/step_q_gap": -0.04311618589743599, "calib/step_q_w": 0.41388541666666673, "calib/step_q_w_n": 96.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 591.30078125, "completions/mean_terminated_length": 633.3598022460938, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.029866666666666666, "grad_norm": 0.028771203011274338, "learning_rate": 4.777777777777778e-06, "loss": 0.198, "num_tokens": 7541735.0, "reward": 0.07218018174171448, "reward_std": 0.15935426950454712, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.046704113483428955, "rewards/format_reward_step": 0.0703125, "step": 28 }, { "aux_distill/final_loss": 0.15687499716877937, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.3, "aux_distill/loss": 0.16252976506948472, "aux_distill/mean_u": 0.3291023339014737, "aux_distill/n_active_final_tok": 5.466666666666667, "aux_distill/n_active_tok": 27.733333333333334, "aux_distill/step_loss": 1.1546726147333781, "calib/answer_extract_rate": 0.3046875, "calib/auroc": 0.5494505494505495, "calib/avg_num_step_conf": 0.81640625, "calib/ece": 0.26250612052730504, "calib/final_conf_rate": 0.23046875, "calib/format_rate": 0.140625, "calib/frac_conf_gt_0.9": 0.13559322033898305, "calib/gap": -0.027903235653233494, "calib/mean_conf": 0.2990569679849322, "calib/mu_c": 0.2744642857142857, "calib/mu_w": 0.3023675213675192, "calib/nonempty_final_conf_rate": 0.23046875, "calib/nonempty_reasoning_rate": 0.37109375, "calib/nonempty_step_conf_rate": 0.23046875, "calib/pce": 0.22145951035781355, "calib/std_conf": 0.3447452290839414, "calib/step_conf_rate": 0.23046875, "calib/step_q_c": 0.3628214285714286, "calib/step_q_c_n": 21.0, "calib/step_q_gap": 0.008101603405987978, "calib/step_q_w": 0.3547198251654406, "calib/step_q_w_n": 188.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 540.4140625, "completions/mean_terminated_length": 571.6776733398438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.030933333333333334, "grad_norm": 0.030148524791002274, "learning_rate": 4.75e-06, "loss": 0.1925, "num_tokens": 7787209.0, "reward": 0.13473469018936157, "reward_std": 0.24897629022598267, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.10150061547756195, "rewards/format_reward_step": 0.140625, "step": 29 }, { "aux_distill/final_loss": 0.05160414253671964, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.3, "aux_distill/loss": 0.13213126137852668, "aux_distill/mean_u": 0.30946222488835323, "aux_distill/n_active_final_tok": 6.266666666666667, "aux_distill/n_active_tok": 38.0, "aux_distill/step_loss": 1.1665001511573792, "calib/answer_extract_rate": 0.32421875, "calib/auroc": 0.5532407407407407, "calib/avg_num_step_conf": 1.12890625, "calib/ece": 0.3050984674327586, "calib/final_conf_rate": 0.2265625, "calib/format_rate": 0.171875, "calib/frac_conf_gt_0.9": 0.15517241379310345, "calib/gap": 0.12028683127592588, "calib/mean_conf": 0.3455088122603448, "calib/mu_c": 0.45749999999999996, "calib/mu_w": 0.3372131687240741, "calib/nonempty_final_conf_rate": 0.2265625, "calib/nonempty_reasoning_rate": 0.359375, "calib/nonempty_step_conf_rate": 0.2421875, "calib/pce": 0.290820881225862, "calib/std_conf": 0.33588368206084973, "calib/step_conf_rate": 0.2421875, "calib/step_q_c": 0.4221052631578947, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.060821297829174004, "calib/step_q_w": 0.3612839653287207, "calib/step_q_w_n": 270.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 629.92578125, "completions/mean_terminated_length": 652.8785400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.02333277463912964, "learning_rate": 4.722222222222222e-06, "loss": 0.1978, "num_tokens": 8055454.0, "reward": 0.15784242749214172, "reward_std": 0.2448986917734146, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.12818485498428345, "rewards/format_reward_step": 0.171875, "step": 30 }, { "aux_distill/final_loss": 0.08237240115801493, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.3, "aux_distill/loss": 0.1363669661184152, "aux_distill/mean_u": 0.28017132346633566, "aux_distill/n_active_final_tok": 5.733333333333333, "aux_distill/n_active_tok": 32.53333333333333, "aux_distill/step_loss": 1.1165524204572042, "calib/answer_extract_rate": 0.32421875, "calib/auroc": 0.544, "calib/avg_num_step_conf": 0.97265625, "calib/ece": 0.31914243203508774, "calib/final_conf_rate": 0.234375, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.16666666666666666, "calib/gap": 0.05475744475789479, "calib/mean_conf": 0.3493487960350876, "calib/mu_c": 0.39498, "calib/mu_w": 0.3402225552421052, "calib/nonempty_final_conf_rate": 0.234375, "calib/nonempty_reasoning_rate": 0.40234375, "calib/nonempty_step_conf_rate": 0.265625, "calib/pce": 0.2509122807017544, "calib/std_conf": 0.3535710425722944, "calib/step_conf_rate": 0.265625, "calib/step_q_c": 0.39952666666666664, "calib/step_q_c_n": 30.0, "calib/step_q_gap": 0.06695623390212319, "calib/step_q_w": 0.33257043276454346, "calib/step_q_w_n": 219.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 473.08984375, "completions/mean_terminated_length": 494.3305969238281, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.03306666666666667, "grad_norm": 0.02187373861670494, "learning_rate": 4.694444444444445e-06, "loss": 0.2168, "num_tokens": 8282477.0, "reward": 0.15648676455020905, "reward_std": 0.27723076939582825, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.11375479400157928, "rewards/format_reward_step": 0.16015625, "step": 31 }, { "aux_distill/final_loss": 0.09190550932544284, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.13442917657084763, "aux_distill/mean_u": 0.3017262707979109, "aux_distill/n_active_final_tok": 7.25, "aux_distill/n_active_tok": 41.625, "aux_distill/step_loss": 1.068575194105506, "calib/answer_extract_rate": 0.38671875, "calib/auroc": 0.2723684210526316, "calib/avg_num_step_conf": 1.30078125, "calib/ece": 0.3715484914301188, "calib/final_conf_rate": 0.31640625, "calib/format_rate": 0.1953125, "calib/frac_conf_gt_0.9": 0.07407407407407407, "calib/gap": -0.14871173939684698, "calib/mean_conf": 0.34783200239704165, "calib/mu_c": 0.2083, "calib/mu_w": 0.357011739396847, "calib/nonempty_final_conf_rate": 0.31640625, "calib/nonempty_reasoning_rate": 0.4765625, "calib/nonempty_step_conf_rate": 0.32421875, "calib/pce": 0.328826049382716, "calib/std_conf": 0.3051820241673833, "calib/step_conf_rate": 0.32421875, "calib/step_q_c": 0.21890000000000004, "calib/step_q_c_n": 5.0, "calib/step_q_gap": -0.09966748154307947, "calib/step_q_w": 0.3185674815430795, "calib/step_q_w_n": 328.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2957.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 487.4765625, "completions/mean_terminated_length": 513.5555419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.034133333333333335, "grad_norm": 0.021919820457696915, "learning_rate": 4.666666666666667e-06, "loss": 0.1687, "num_tokens": 8513975.0, "reward": 0.18255311250686646, "reward_std": 0.2816970944404602, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.1502624899148941, "rewards/format_reward_step": 0.1953125, "step": 32 }, { "aux_distill/final_loss": 0.06824627239257097, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.12928524776361883, "aux_distill/mean_u": 0.3491194235858012, "aux_distill/n_active_final_tok": 10.875, "aux_distill/n_active_tok": 70.25, "aux_distill/step_loss": 1.0881136264652014, "calib/answer_extract_rate": 0.54296875, "calib/auroc": 0.5186084142394822, "calib/avg_num_step_conf": 2.203125, "calib/ece": 0.24606372253916614, "calib/final_conf_rate": 0.44921875, "calib/format_rate": 0.30859375, "calib/frac_conf_gt_0.9": 0.10434782608695652, "calib/gap": 0.04465793286085118, "calib/mean_conf": 0.2825020253507157, "calib/mu_c": 0.32249999999999995, "calib/mu_w": 0.2778420671391488, "calib/nonempty_final_conf_rate": 0.44921875, "calib/nonempty_reasoning_rate": 0.65234375, "calib/nonempty_step_conf_rate": 0.484375, "calib/pce": 0.2121089609014627, "calib/std_conf": 0.30670906421511474, "calib/step_conf_rate": 0.484375, "calib/step_q_c": 0.29117647058823526, "calib/step_q_c_n": 34.0, "calib/step_q_gap": -0.011777160787864838, "calib/step_q_w": 0.3029536313761001, "calib/step_q_w_n": 528.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 468.45703125, "completions/mean_terminated_length": 493.51849365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0352, "grad_norm": 0.018631640821695328, "learning_rate": 4.638888888888889e-06, "loss": 0.2603, "num_tokens": 8740772.0, "reward": 0.2996920347213745, "reward_std": 0.41037723422050476, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.24391531944274902, "rewards/format_reward_step": 0.30859375, "step": 33 }, { "aux_distill/final_loss": 0.07452091845334508, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.1296235928311944, "aux_distill/mean_u": 0.35437979177331624, "aux_distill/n_active_final_tok": 12.125, "aux_distill/n_active_tok": 67.5, "aux_distill/step_loss": 1.072673151269555, "calib/answer_extract_rate": 0.54296875, "calib/auroc": 0.5162907268170426, "calib/avg_num_step_conf": 2.109375, "calib/ece": 0.26456615434668057, "calib/final_conf_rate": 0.47265625, "calib/format_rate": 0.36328125, "calib/frac_conf_gt_0.9": 0.049586776859504134, "calib/gap": 0.04187715196536529, "calib/mean_conf": 0.3105454931896558, "calib/mu_c": 0.35, "calib/mu_w": 0.3081228480346347, "calib/nonempty_final_conf_rate": 0.47265625, "calib/nonempty_reasoning_rate": 0.64453125, "calib/nonempty_step_conf_rate": 0.5, "calib/pce": 0.2586302039334574, "calib/std_conf": 0.2937268942936628, "calib/step_conf_rate": 0.5, "calib/step_q_c": 0.24750000000000003, "calib/step_q_c_n": 24.0, "calib/step_q_gap": -0.049518853210390484, "calib/step_q_w": 0.2970188532103905, "calib/step_q_w_n": 516.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2998.0, "completions/max_terminated_length": 2998.0, "completions/mean_length": 412.76953125, "completions/mean_terminated_length": 431.3020324707031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.03626666666666667, "grad_norm": 0.02291269227862358, "learning_rate": 4.611111111111112e-06, "loss": 0.3014, "num_tokens": 8951553.0, "reward": 0.3447210192680359, "reward_std": 0.41864240169525146, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.29491081833839417, "rewards/format_reward_step": 0.36328125, "step": 34 }, { "aux_distill/final_loss": 0.12155485508264974, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.13869250193238258, "aux_distill/mean_u": 0.34067662841664037, "aux_distill/n_active_final_tok": 15.75, "aux_distill/n_active_tok": 83.875, "aux_distill/step_loss": 1.022260420024395, "calib/answer_extract_rate": 0.62890625, "calib/auroc": 0.6041666666666667, "calib/avg_num_step_conf": 2.625, "calib/ece": 0.22890294398821, "calib/final_conf_rate": 0.59375, "calib/format_rate": 0.48046875, "calib/frac_conf_gt_0.9": 0.039473684210526314, "calib/gap": 0.1550505075594254, "calib/mean_conf": 0.2889403219847397, "calib/mu_c": 0.4317499999999999, "calib/mu_w": 0.2766994924405745, "calib/nonempty_final_conf_rate": 0.59375, "calib/nonempty_reasoning_rate": 0.7421875, "calib/nonempty_step_conf_rate": 0.61328125, "calib/pce": 0.21944794877594856, "calib/std_conf": 0.28105935895253104, "calib/step_conf_rate": 0.61328125, "calib/step_q_c": 0.37579310344827593, "calib/step_q_c_n": 29.0, "calib/step_q_gap": 0.044360961577793734, "calib/step_q_w": 0.3314321418704822, "calib/step_q_w_n": 643.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 460.24609375, "completions/mean_terminated_length": 465.7035827636719, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.037333333333333336, "grad_norm": 0.023835359141230583, "learning_rate": 4.583333333333333e-06, "loss": 0.2279, "num_tokens": 9178632.0, "reward": 0.4641324281692505, "reward_std": 0.4356589913368225, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.400921106338501, "rewards/format_reward_step": 0.48046875, "step": 35 }, { "aux_distill/final_loss": 0.08090376047766767, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.12377052824012935, "aux_distill/mean_u": 0.3144702464862898, "aux_distill/n_active_final_tok": 20.375, "aux_distill/n_active_tok": 114.25, "aux_distill/step_loss": 0.9949939772486687, "calib/answer_extract_rate": 0.7421875, "calib/auroc": 0.5090123456790123, "calib/avg_num_step_conf": 3.57421875, "calib/ece": 0.2370630700310029, "calib/final_conf_rate": 0.69140625, "calib/format_rate": 0.61328125, "calib/frac_conf_gt_0.9": 0.05649717514124294, "calib/gap": 0.027269630621946317, "calib/mean_conf": 0.2972362517089617, "calib/mu_c": 0.32034610816823816, "calib/mu_w": 0.29307647754629185, "calib/nonempty_final_conf_rate": 0.69140625, "calib/nonempty_reasoning_rate": 0.8984375, "calib/nonempty_step_conf_rate": 0.8125, "calib/pce": 0.1908784744293043, "calib/std_conf": 0.28309407595640745, "calib/step_conf_rate": 0.8125, "calib/step_q_c": 0.39568156440629015, "calib/step_q_c_n": 118.0, "calib/step_q_gap": 0.08009663504477876, "calib/step_q_w": 0.3155849293615114, "calib/step_q_w_n": 794.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2398.0, "completions/max_terminated_length": 2398.0, "completions/mean_length": 390.75390625, "completions/mean_terminated_length": 395.3873596191406, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.0384, "grad_norm": 0.015997234731912613, "learning_rate": 4.555555555555556e-06, "loss": 0.3133, "num_tokens": 9381377.0, "reward": 0.5978110432624817, "reward_std": 0.4598034620285034, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.47296571731567383, "rewards/format_reward_step": 0.61328125, "step": 36 }, { "aux_distill/final_loss": 0.08535356540232897, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.12664865073747933, "aux_distill/mean_u": 0.3223564653255679, "aux_distill/n_active_final_tok": 19.0, "aux_distill/n_active_tok": 101.0, "aux_distill/step_loss": 1.010425791144371, "calib/answer_extract_rate": 0.73046875, "calib/auroc": 0.5167151162790697, "calib/avg_num_step_conf": 3.171875, "calib/ece": 0.303168981084137, "calib/final_conf_rate": 0.703125, "calib/format_rate": 0.54296875, "calib/frac_conf_gt_0.9": 0.08333333333333333, "calib/gap": 0.08023595002822875, "calib/mean_conf": 0.3308300921952481, "calib/mu_c": 0.40750000000000003, "calib/mu_w": 0.3272640499717713, "calib/nonempty_final_conf_rate": 0.703125, "calib/nonempty_reasoning_rate": 0.90625, "calib/nonempty_step_conf_rate": 0.78515625, "calib/pce": 0.29477731441747035, "calib/std_conf": 0.2904608136630969, "calib/step_conf_rate": 0.78515625, "calib/step_q_c": 0.24941176470588236, "calib/step_q_c_n": 17.0, "calib/step_q_gap": -0.08711469152545939, "calib/step_q_w": 0.33652645623134175, "calib/step_q_w_n": 795.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 422.08984375, "completions/mean_terminated_length": 427.0948791503906, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.039466666666666664, "grad_norm": 0.014809138141572475, "learning_rate": 4.527777777777778e-06, "loss": 0.1997, "num_tokens": 9596528.0, "reward": 0.5087645649909973, "reward_std": 0.4285920262336731, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.44331035017967224, "rewards/format_reward_step": 0.54296875, "step": 37 }, { "aux_distill/final_loss": 0.06789625725650694, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.12406372674740851, "aux_distill/mean_u": 0.36764973522124766, "aux_distill/n_active_final_tok": 22.0, "aux_distill/n_active_tok": 115.0, "aux_distill/step_loss": 1.0369484722614288, "calib/answer_extract_rate": 0.77734375, "calib/auroc": 0.5128330084470436, "calib/avg_num_step_conf": 3.59765625, "calib/ece": 0.26338781084223273, "calib/final_conf_rate": 0.73828125, "calib/format_rate": 0.63671875, "calib/frac_conf_gt_0.9": 0.0582010582010582, "calib/gap": -0.010150525112302244, "calib/mean_conf": 0.32983380843494015, "calib/mu_c": 0.32065000000000005, "calib/mu_w": 0.3308005251123023, "calib/nonempty_final_conf_rate": 0.73828125, "calib/nonempty_reasoning_rate": 0.9140625, "calib/nonempty_step_conf_rate": 0.8359375, "calib/pce": 0.2489917620195388, "calib/std_conf": 0.26640916066091724, "calib/step_conf_rate": 0.8359375, "calib/step_q_c": 0.3678795454545455, "calib/step_q_c_n": 88.0, "calib/step_q_gap": 0.038739793970864245, "calib/step_q_w": 0.32913975148368124, "calib/step_q_w_n": 832.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 358.9140625, "completions/mean_terminated_length": 360.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.04053333333333333, "grad_norm": 0.015669597312808037, "learning_rate": 4.5e-06, "loss": 0.2079, "num_tokens": 9795298.0, "reward": 0.6117260456085205, "reward_std": 0.47368207573890686, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.5086084604263306, "rewards/format_reward_step": 0.63671875, "step": 38 }, { "aux_distill/final_loss": 0.05943663168000057, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.11475968128070235, "aux_distill/mean_u": 0.33628318293849246, "aux_distill/n_active_final_tok": 20.875, "aux_distill/n_active_tok": 120.25, "aux_distill/step_loss": 0.9692869018763304, "calib/answer_extract_rate": 0.80078125, "calib/auroc": 0.550663039782387, "calib/avg_num_step_conf": 3.76171875, "calib/ece": 0.28823332367148746, "calib/final_conf_rate": 0.7421875, "calib/format_rate": 0.609375, "calib/frac_conf_gt_0.9": 0.06315789473684211, "calib/gap": 0.053555037628975455, "calib/mean_conf": 0.35962496418984, "calib/mu_c": 0.4083882352941176, "calib/mu_w": 0.35483319766514215, "calib/nonempty_final_conf_rate": 0.7421875, "calib/nonempty_reasoning_rate": 0.921875, "calib/nonempty_step_conf_rate": 0.80859375, "calib/pce": 0.2791923018254006, "calib/std_conf": 0.281930714845133, "calib/step_conf_rate": 0.80859375, "calib/step_q_c": 0.4455693548387097, "calib/step_q_c_n": 62.0, "calib/step_q_gap": 0.07279414678819801, "calib/step_q_w": 0.3727752080505117, "calib/step_q_w_n": 899.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2342.0, "completions/max_terminated_length": 2342.0, "completions/mean_length": 349.28515625, "completions/mean_terminated_length": 350.6549377441406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0416, "grad_norm": 0.013297786936163902, "learning_rate": 4.472222222222223e-06, "loss": 0.213, "num_tokens": 9990803.0, "reward": 0.5791658163070679, "reward_std": 0.46792203187942505, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.47864413261413574, "rewards/format_reward_step": 0.609375, "step": 39 }, { "aux_distill/final_loss": 0.07307140520424582, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.12082414398901165, "aux_distill/mean_u": 0.33751087471136826, "aux_distill/n_active_final_tok": 23.5, "aux_distill/n_active_tok": 128.5, "aux_distill/step_loss": 0.9890271984040737, "calib/answer_extract_rate": 0.81640625, "calib/auroc": 0.6164062499999999, "calib/avg_num_step_conf": 4.08203125, "calib/ece": 0.2868777103093446, "calib/final_conf_rate": 0.7890625, "calib/format_rate": 0.6796875, "calib/frac_conf_gt_0.9": 0.039603960396039604, "calib/gap": 0.09954532561204377, "calib/mean_conf": 0.33638266080439416, "calib/mu_c": 0.43100000000000005, "calib/mu_w": 0.3314546743879563, "calib/nonempty_final_conf_rate": 0.7890625, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.875, "calib/pce": 0.2868777103093446, "calib/std_conf": 0.2630237438934269, "calib/step_conf_rate": 0.875, "calib/step_q_c": 0.47046511627906984, "calib/step_q_c_n": 43.0, "calib/step_q_gap": 0.12202918122856177, "calib/step_q_w": 0.34843593505050807, "calib/step_q_w_n": 1001.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2637.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 342.30078125, "completions/mean_terminated_length": 343.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.042666666666666665, "grad_norm": 0.013759545050561428, "learning_rate": 4.444444444444444e-06, "loss": 0.2555, "num_tokens": 10185192.0, "reward": 0.6373392939567566, "reward_std": 0.44615915417671204, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.555928647518158, "rewards/format_reward_step": 0.6796875, "step": 40 }, { "aux_distill/final_loss": 0.073347973782802, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.12163389869965613, "aux_distill/mean_u": 0.3582824330040726, "aux_distill/n_active_final_tok": 24.25, "aux_distill/n_active_tok": 134.25, "aux_distill/step_loss": 0.9962950479239225, "calib/answer_extract_rate": 0.8359375, "calib/auroc": 0.5697314049586777, "calib/avg_num_step_conf": 4.203125, "calib/ece": 0.2330188857812201, "calib/final_conf_rate": 0.81640625, "calib/format_rate": 0.7421875, "calib/frac_conf_gt_0.9": 0.04784688995215311, "calib/gap": 0.08057175342125994, "calib/mean_conf": 0.3395137970524938, "calib/mu_c": 0.4073636946703969, "calib/mu_w": 0.32679194124913696, "calib/nonempty_final_conf_rate": 0.81640625, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.88671875, "calib/pce": 0.20731897299580432, "calib/std_conf": 0.2690940664418781, "calib/step_conf_rate": 0.88671875, "calib/step_q_c": 0.37171899176753614, "calib/step_q_c_n": 166.0, "calib/step_q_gap": 0.01251601923579454, "calib/step_q_w": 0.3592029725317416, "calib/step_q_w_n": 910.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 328.80078125, "completions/mean_terminated_length": 328.80078125, "completions/min_length": 1.0, "completions/min_terminated_length": 1.0, "epoch": 0.04373333333333333, "grad_norm": 0.014515561051666737, "learning_rate": 4.416666666666667e-06, "loss": 0.2034, "num_tokens": 10376613.0, "reward": 0.7310442924499512, "reward_std": 0.4173903465270996, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.5831823348999023, "rewards/format_reward_step": 0.7421875, "step": 41 }, { "aux_distill/final_loss": 0.04330735033727251, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.11263674194924533, "aux_distill/mean_u": 0.3688767612127583, "aux_distill/n_active_final_tok": 27.375, "aux_distill/n_active_tok": 136.625, "aux_distill/step_loss": 0.9964453428983688, "calib/answer_extract_rate": 0.8984375, "calib/auroc": 0.46195452878621196, "calib/avg_num_step_conf": 4.2734375, "calib/ece": 0.24793024545255773, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.021834061135371178, "calib/gap": -0.036909351969875714, "calib/mean_conf": 0.30854010351815786, "calib/mu_c": 0.27598250920411904, "calib/mu_w": 0.31289186117399476, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.2192832094198556, "calib/std_conf": 0.24627190212816186, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.3277032935885949, "calib/step_q_c_n": 113.0, "calib/step_q_gap": -0.048347392745087536, "calib/step_q_w": 0.37605068633368244, "calib/step_q_w_n": 981.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 311.50390625, "completions/mean_terminated_length": 312.7254943847656, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0448, "grad_norm": 0.013212639838457108, "learning_rate": 4.388888888888889e-06, "loss": 0.1785, "num_tokens": 10560726.0, "reward": 0.7954630851745605, "reward_std": 0.3343948423862457, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.6534261703491211, "rewards/format_reward_step": 0.828125, "step": 42 }, { "aux_distill/final_loss": 0.05465633030689787, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.1129497536458075, "aux_distill/mean_u": 0.3167355963575318, "aux_distill/n_active_final_tok": 25.375, "aux_distill/n_active_tok": 140.375, "aux_distill/step_loss": 0.9655285216867924, "calib/answer_extract_rate": 0.88671875, "calib/auroc": 0.606114435302917, "calib/avg_num_step_conf": 4.38671875, "calib/ece": 0.23020883058674113, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.75, "calib/frac_conf_gt_0.9": 0.0639269406392694, "calib/gap": 0.08793215833169343, "calib/mean_conf": 0.34798888082878393, "calib/mu_c": 0.4246785714285714, "calib/mu_w": 0.336746413096878, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.90234375, "calib/pce": 0.22517191506849316, "calib/std_conf": 0.26839392865065215, "calib/step_conf_rate": 0.90234375, "calib/step_q_c": 0.4515044444444444, "calib/step_q_c_n": 135.0, "calib/step_q_gap": 0.11117224036032403, "calib/step_q_w": 0.34033220408412035, "calib/step_q_w_n": 987.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 325.203125, "completions/mean_terminated_length": 325.203125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.04586666666666667, "grad_norm": 0.011783161200582981, "learning_rate": 4.361111111111112e-06, "loss": 0.1914, "num_tokens": 10749202.0, "reward": 0.7341213226318359, "reward_std": 0.41676369309425354, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.6088675856590271, "rewards/format_reward_step": 0.75, "step": 43 }, { "aux_distill/final_loss": 0.049435441731475294, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.11235560267232358, "aux_distill/mean_u": 0.3773883281417893, "aux_distill/n_active_final_tok": 27.625, "aux_distill/n_active_tok": 148.625, "aux_distill/step_loss": 0.9752496667206287, "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.5394678492239469, "calib/avg_num_step_conf": 4.64453125, "calib/ece": 0.2478478984074095, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.80859375, "calib/frac_conf_gt_0.9": 0.04405286343612335, "calib/gap": 0.0262825476040488, "calib/mean_conf": 0.33462829601636773, "calib/mu_c": 0.3583636363636364, "calib/mu_w": 0.3320810887595876, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.24277994743215292, "calib/std_conf": 0.2561106484697805, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.3568977272727273, "calib/step_q_c_n": 88.0, "calib/step_q_gap": 0.0011912174939330278, "calib/step_q_w": 0.35570650977879426, "calib/step_q_w_n": 1100.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 325.0625, "completions/mean_terminated_length": 326.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.046933333333333334, "grad_norm": 0.01184465829282999, "learning_rate": 4.333333333333334e-06, "loss": 0.2199, "num_tokens": 10938738.0, "reward": 0.7741551995277405, "reward_std": 0.3730512261390686, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.6498727798461914, "rewards/format_reward_step": 0.80859375, "step": 44 }, { "aux_distill/final_loss": 0.05315451697970275, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.1127287270501256, "aux_distill/mean_u": 0.3138015339425314, "aux_distill/n_active_final_tok": 29.375, "aux_distill/n_active_tok": 143.0, "aux_distill/step_loss": 0.9678237065672874, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5745116279069767, "calib/avg_num_step_conf": 4.48046875, "calib/ece": 0.24868822013674488, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.03333333333333333, "calib/gap": 0.07026412635898238, "calib/mean_conf": 0.3274190534700782, "calib/mu_c": 0.390364, "calib/mu_w": 0.3200998736410176, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.23597030347007822, "calib/std_conf": 0.25065563260018275, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.38115267857142854, "calib/step_q_c_n": 112.0, "calib/step_q_gap": 0.0320939329410197, "calib/step_q_w": 0.34905874563040884, "calib/step_q_w_n": 1035.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1676.0, "completions/max_terminated_length": 1676.0, "completions/mean_length": 292.75390625, "completions/mean_terminated_length": 293.9019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.048, "grad_norm": 0.010884284973144531, "learning_rate": 4.305555555555556e-06, "loss": 0.1242, "num_tokens": 11118731.0, "reward": 0.8567354679107666, "reward_std": 0.29351842403411865, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.7173771858215332, "rewards/format_reward_step": 0.89453125, "step": 45 }, { "aux_distill/final_loss": 0.03761594461684581, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.10966533492319286, "aux_distill/mean_u": 0.374399186398532, "aux_distill/n_active_final_tok": 28.875, "aux_distill/n_active_tok": 146.5, "aux_distill/step_loss": 0.983805488795042, "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.5698276900831425, "calib/avg_num_step_conf": 4.578125, "calib/ece": 0.1949523083914912, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.8671875, "calib/frac_conf_gt_0.9": 0.038135593220338986, "calib/gap": 0.07807160849113454, "calib/mean_conf": 0.332037024155703, "calib/mu_c": 0.39588372093023255, "calib/mu_w": 0.317812112439098, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.17239297135834286, "calib/std_conf": 0.2617218977178439, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.4384761904761904, "calib/step_q_c_n": 168.0, "calib/step_q_gap": 0.09595837940467872, "calib/step_q_w": 0.3425178110715117, "calib/step_q_w_n": 1004.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2388.0, "completions/max_terminated_length": 2388.0, "completions/mean_length": 298.4453125, "completions/mean_terminated_length": 299.6156921386719, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.04906666666666667, "grad_norm": 0.011077606119215488, "learning_rate": 4.277777777777778e-06, "loss": 0.2487, "num_tokens": 11299901.0, "reward": 0.8560590744018555, "reward_std": 0.33570653200149536, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.6769618391990662, "rewards/format_reward_step": 0.8671875, "step": 46 }, { "aux_distill/final_loss": 0.03197351558628725, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.10705634765326977, "aux_distill/mean_u": 0.37307014319606496, "aux_distill/n_active_final_tok": 29.25, "aux_distill/n_active_tok": 156.0, "aux_distill/step_loss": 0.9746429100632668, "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.4594080338266385, "calib/avg_num_step_conf": 4.88671875, "calib/ece": 0.2856373839662448, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.0379746835443038, "calib/gap": -0.0206352050739958, "calib/mean_conf": 0.36190151898734174, "calib/mu_c": 0.3431818181818182, "calib/mu_w": 0.363817023255814, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.27735594936708863, "calib/std_conf": 0.2491163533164288, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.3939784946236559, "calib/step_q_c_n": 93.0, "calib/step_q_gap": 0.020942277007075627, "calib/step_q_w": 0.3730362176165803, "calib/step_q_w_n": 1158.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2335.0, "completions/max_terminated_length": 2335.0, "completions/mean_length": 283.0546875, "completions/mean_terminated_length": 283.0546875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.050133333333333335, "grad_norm": 0.010590034537017345, "learning_rate": 4.25e-06, "loss": 0.2122, "num_tokens": 11478339.0, "reward": 0.8327673673629761, "reward_std": 0.3097901940345764, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.6889722943305969, "rewards/format_reward_step": 0.88671875, "step": 47 }, { "aux_distill/final_loss": 0.020423345937160775, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.10429738136008382, "aux_distill/mean_u": 0.33795998690645146, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 144.625, "aux_distill/step_loss": 0.981703756377101, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5763574660633484, "calib/avg_num_step_conf": 4.51953125, "calib/ece": 0.24455381526104417, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": 0.06763316095669031, "calib/mean_conf": 0.3418293172690763, "calib/mu_c": 0.4018571428571428, "calib/mu_w": 0.3342239819004525, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2369666666666667, "calib/std_conf": 0.2563793693495012, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.3975843697478991, "calib/step_q_c_n": 119.0, "calib/step_q_gap": 0.037061737763313396, "calib/step_q_w": 0.36052263198458573, "calib/step_q_w_n": 1038.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1704.0, "completions/max_terminated_length": 1704.0, "completions/mean_length": 251.84375, "completions/mean_terminated_length": 251.84375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.0512, "grad_norm": 0.010474754497408867, "learning_rate": 4.222222222222223e-06, "loss": 0.0945, "num_tokens": 11646499.0, "reward": 0.8939938545227051, "reward_std": 0.2552899718284607, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.7450190186500549, "rewards/format_reward_step": 0.93359375, "step": 48 }, { "aux_distill/final_loss": 0.04292366838490125, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.11004131496883929, "aux_distill/mean_u": 0.37283995651407686, "aux_distill/n_active_final_tok": 30.375, "aux_distill/n_active_tok": 154.25, "aux_distill/step_loss": 0.9716421309858561, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5504694835680751, "calib/avg_num_step_conf": 4.828125, "calib/ece": 0.24948395061728393, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.04938271604938271, "calib/gap": 0.03866948356807509, "calib/mean_conf": 0.35110452674897125, "calib/mu_c": 0.38500000000000006, "calib/mu_w": 0.346330516431925, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23856584362139918, "calib/std_conf": 0.24851372522578924, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.37976377952755913, "calib/step_q_c_n": 127.0, "calib/step_q_gap": 0.0188441221785961, "calib/step_q_w": 0.36091965734896303, "calib/step_q_w_n": 1109.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2805.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 267.37109375, "completions/mean_terminated_length": 269.47637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.05226666666666667, "grad_norm": 0.009276405908167362, "learning_rate": 4.194444444444445e-06, "loss": 0.0878, "num_tokens": 11819482.0, "reward": 0.9003954529762268, "reward_std": 0.21878936886787415, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.7421971559524536, "rewards/format_reward_step": 0.94140625, "step": 49 }, { "aux_distill/final_loss": 0.019410489479923854, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.10197383398190141, "aux_distill/mean_u": 0.3622874265331878, "aux_distill/n_active_final_tok": 30.25, "aux_distill/n_active_tok": 158.625, "aux_distill/step_loss": 0.96150684915483, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.47657807308970096, "calib/avg_num_step_conf": 4.95703125, "calib/ece": 0.24659780999571898, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.024691358024691357, "calib/gap": -0.009614103121117601, "calib/mean_conf": 0.3310063052306185, "calib/mu_c": 0.32250000000000006, "calib/mu_w": 0.33211410312111767, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.23118888888888892, "calib/std_conf": 0.2330422734672042, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.3619376923076923, "calib/step_q_c_n": 130.0, "calib/step_q_gap": 0.009737342361419488, "calib/step_q_w": 0.3522003499462728, "calib/step_q_w_n": 1139.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2154.0, "completions/max_terminated_length": 2154.0, "completions/mean_length": 292.67578125, "completions/mean_terminated_length": 294.9803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.05333333333333334, "grad_norm": 0.01001722365617752, "learning_rate": 4.166666666666667e-06, "loss": 0.1847, "num_tokens": 11999767.0, "reward": 0.8708915710449219, "reward_std": 0.2704950273036957, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.726158082485199, "rewards/format_reward_step": 0.90625, "step": 50 }, { "aux_distill/final_loss": 0.03430827522606705, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.1066815466620028, "aux_distill/mean_u": 0.3071702017048218, "aux_distill/n_active_final_tok": 30.625, "aux_distill/n_active_tok": 157.5, "aux_distill/step_loss": 0.9638906251639128, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5546636085626913, "calib/avg_num_step_conf": 4.92578125, "calib/ece": 0.20473629032258064, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.016129032258064516, "calib/gap": 0.047611314984709496, "calib/mean_conf": 0.3234814516129032, "calib/mu_c": 0.3653333333333333, "calib/mu_w": 0.3177220183486238, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.203625, "calib/std_conf": 0.236696744143673, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.3991240875912409, "calib/step_q_c_n": 137.0, "calib/step_q_gap": 0.053205315349247995, "calib/step_q_w": 0.3459187722419929, "calib/step_q_w_n": 1124.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 925.0, "completions/max_terminated_length": 925.0, "completions/mean_length": 250.44921875, "completions/mean_terminated_length": 251.43138122558594, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.0544, "grad_norm": 0.010000507347285748, "learning_rate": 4.138888888888889e-06, "loss": 0.0882, "num_tokens": 12173178.0, "reward": 0.9098514318466187, "reward_std": 0.21336929500102997, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.7572028636932373, "rewards/format_reward_step": 0.94140625, "step": 51 }, { "aux_distill/final_loss": 0.043212573902565055, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.11219168268144131, "aux_distill/mean_u": 0.3817733282392883, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 148.0, "aux_distill/step_loss": 0.9922790843993425, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.579156046445766, "calib/avg_num_step_conf": 4.625, "calib/ece": 0.20712024291497977, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.02834008097165992, "calib/gap": 0.0525386717643726, "calib/mean_conf": 0.31932914979757077, "calib/mu_c": 0.36484848484848476, "calib/mu_w": 0.31230981308411215, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19642307692307692, "calib/std_conf": 0.23119480394012487, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3610204081632653, "calib/step_q_c_n": 147.0, "calib/step_q_gap": 0.016775696183480238, "calib/step_q_w": 0.34424471197978507, "calib/step_q_w_n": 1037.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2617.0, "completions/max_terminated_length": 2617.0, "completions/mean_length": 283.5078125, "completions/mean_terminated_length": 283.5078125, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.055466666666666664, "grad_norm": 0.009968329221010208, "learning_rate": 4.111111111111111e-06, "loss": 0.1697, "num_tokens": 12353708.0, "reward": 0.9156846404075623, "reward_std": 0.21960917115211487, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.7610567212104797, "rewards/format_reward_step": 0.94140625, "step": 52 }, { "aux_distill/final_loss": 0.027034704871766735, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.10531486221589148, "aux_distill/mean_u": 0.37542458679157503, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 155.625, "aux_distill/step_loss": 0.9720444846898317, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5129870129870131, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.21119253512266722, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": -0.0053625547121392, "calib/mean_conf": 0.32351285384776685, "calib/mu_c": 0.3190476190476191, "calib/mu_w": 0.3244101737597583, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.1836873558397987, "calib/std_conf": 0.24948658797038709, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.38239583333333327, "calib/step_q_c_n": 192.0, "calib/step_q_gap": 0.021715674192182233, "calib/step_q_w": 0.36068015914115104, "calib/step_q_w_n": 1053.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2037.0, "completions/max_terminated_length": 2037.0, "completions/mean_length": 267.30859375, "completions/mean_terminated_length": 267.30859375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.05653333333333333, "grad_norm": 0.010738939978182316, "learning_rate": 4.083333333333334e-06, "loss": 0.1677, "num_tokens": 12527963.0, "reward": 0.9240987300872803, "reward_std": 0.23157396912574768, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.7349162697792053, "rewards/format_reward_step": 0.94921875, "step": 53 }, { "aux_distill/final_loss": 0.015329442940128502, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09901187103241682, "aux_distill/mean_u": 0.313985019727838, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 171.25, "aux_distill/step_loss": 0.9441303610801697, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49085336653577244, "calib/avg_num_step_conf": 5.35546875, "calib/ece": 0.18617960000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": -0.01886412221051631, "calib/mean_conf": 0.3169404, "calib/mu_c": 0.30207547169811316, "calib/mu_w": 0.3209395939086295, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14556000000000002, "calib/std_conf": 0.22916550632204663, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3609964412811388, "calib/step_q_c_n": 281.0, "calib/step_q_gap": 0.01222047797838649, "calib/step_q_w": 0.3487759633027523, "calib/step_q_w_n": 1090.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 274.76171875, "completions/mean_terminated_length": 274.76171875, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.0576, "grad_norm": 0.009860129095613956, "learning_rate": 4.055555555555556e-06, "loss": 0.0712, "num_tokens": 12704534.0, "reward": 0.9352671504020691, "reward_std": 0.24189065396785736, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7220968008041382, "rewards/format_reward_step": 0.94140625, "step": 54 }, { "aux_distill/final_loss": 0.012575812774230144, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0991168157197535, "aux_distill/mean_u": 0.3420365307680586, "aux_distill/n_active_final_tok": 30.5, "aux_distill/n_active_tok": 166.875, "aux_distill/step_loss": 0.9534407090395689, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5042366582429018, "calib/avg_num_step_conf": 5.23828125, "calib/ece": 0.2182177419354839, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.03225806451612903, "calib/gap": -0.006046082949308806, "calib/mean_conf": 0.31851612903225807, "calib/mu_c": 0.3132258064516129, "calib/mu_w": 0.3192718894009217, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.205866935483871, "calib/std_conf": 0.23776452107450727, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.32493589743589746, "calib/step_q_c_n": 156.0, "calib/step_q_gap": 0.005123481895356885, "calib/step_q_w": 0.3198124155405406, "calib/step_q_w_n": 1184.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 260.10546875, "completions/mean_terminated_length": 261.1255187988281, "completions/min_length": 0.0, "completions/min_terminated_length": 28.0, "epoch": 0.058666666666666666, "grad_norm": 0.0105406753718853, "learning_rate": 4.027777777777779e-06, "loss": 0.0513, "num_tokens": 12878945.0, "reward": 0.9142815470695496, "reward_std": 0.2144692987203598, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.7582505941390991, "rewards/format_reward_step": 0.9453125, "step": 55 }, { "aux_distill/final_loss": 0.03427942103189707, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.10482139163650572, "aux_distill/mean_u": 0.3670959649358311, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 179.5, "aux_distill/step_loss": 0.9453756399452686, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5931372549019608, "calib/avg_num_step_conf": 5.609375, "calib/ece": 0.22755736900790513, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.07360258134033604, "calib/mean_conf": 0.28256120806719365, "calib/mu_c": 0.35179999999999995, "calib/mu_w": 0.2781974186596639, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.22541501976284584, "calib/std_conf": 0.24589222860685844, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.3103855421686747, "calib/step_q_c_n": 83.0, "calib/step_q_gap": -0.00240894692863991, "calib/step_q_w": 0.3127944890973146, "calib/step_q_w_n": 1353.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1644.0, "completions/max_terminated_length": 1644.0, "completions/mean_length": 289.73828125, "completions/mean_terminated_length": 289.73828125, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.05973333333333333, "grad_norm": 0.00916214007884264, "learning_rate": 4.000000000000001e-06, "loss": 0.1425, "num_tokens": 13059958.0, "reward": 0.898558497428894, "reward_std": 0.2293228805065155, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.7932106256484985, "rewards/format_reward_step": 0.9453125, "step": 56 }, { "aux_distill/final_loss": 0.005160419772437308, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.095540456706658, "aux_distill/mean_u": 0.31406481733621183, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 182.125, "aux_distill/step_loss": 0.9399232901632786, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5213414634146342, "calib/avg_num_step_conf": 5.69140625, "calib/ece": 0.16486827309236946, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.01569842870544086, "calib/mean_conf": 0.2759108433734939, "calib/mu_c": 0.2890243902439024, "calib/mu_w": 0.27332596153846156, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.13806024096385539, "calib/std_conf": 0.22974561172592797, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2871748878923767, "calib/step_q_c_n": 223.0, "calib/step_q_gap": -0.03477616559222618, "calib/step_q_w": 0.3219510534846029, "calib/step_q_w_n": 1234.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2367.0, "completions/max_terminated_length": 2367.0, "completions/mean_length": 302.14453125, "completions/mean_terminated_length": 302.14453125, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.0608, "grad_norm": 0.008268813602626324, "learning_rate": 3.972222222222223e-06, "loss": 0.1189, "num_tokens": 13244099.0, "reward": 0.9364712238311768, "reward_std": 0.20617589354515076, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.7635673880577087, "rewards/format_reward_step": 0.94921875, "step": 57 }, { "aux_distill/final_loss": 0.004002972682428663, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09283851063810289, "aux_distill/mean_u": 0.30662411728537864, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 186.5, "aux_distill/step_loss": 0.9163761790841818, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5744819709577419, "calib/avg_num_step_conf": 5.83984375, "calib/ece": 0.19728495404768776, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.06254475202169324, "calib/mean_conf": 0.2672888910555618, "calib/mu_c": 0.32318518518518524, "calib/mu_w": 0.260640433163492, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17913731625241217, "calib/std_conf": 0.22812540392277186, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31599999999999995, "calib/step_q_c_n": 126.0, "calib/step_q_gap": 0.030067924393320455, "calib/step_q_w": 0.2859320756066795, "calib/step_q_w_n": 1369.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2210.0, "completions/max_terminated_length": 2210.0, "completions/mean_length": 286.65234375, "completions/mean_terminated_length": 287.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.06186666666666667, "grad_norm": 0.008693036623299122, "learning_rate": 3.944444444444445e-06, "loss": 0.1003, "num_tokens": 13423802.0, "reward": 0.9394717812538147, "reward_std": 0.1837690770626068, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.8086310625076294, "rewards/format_reward_step": 0.96484375, "step": 58 }, { "aux_distill/final_loss": 0.015480311729334062, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09621719340793788, "aux_distill/mean_u": 0.3268558049487162, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 185.875, "aux_distill/step_loss": 0.9157309792935848, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6145670318205531, "calib/avg_num_step_conf": 5.83203125, "calib/ece": 0.18993288157672691, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.10410404923659622, "calib/mean_conf": 0.2871971386048393, "calib/mu_c": 0.37625, "calib/mu_w": 0.27214595076340375, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16627585346427712, "calib/std_conf": 0.23079239681758587, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3913214285714286, "calib/step_q_c_n": 168.0, "calib/step_q_gap": 0.07705570214682106, "calib/step_q_w": 0.31426572642460754, "calib/step_q_w_n": 1325.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2513.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 295.46484375, "completions/mean_terminated_length": 296.62353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.06293333333333333, "grad_norm": 0.008355661295354366, "learning_rate": 3.916666666666667e-06, "loss": 0.1585, "num_tokens": 13605689.0, "reward": 0.951852560043335, "reward_std": 0.17462879419326782, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.7982363700866699, "rewards/format_reward_step": 0.96484375, "step": 59 }, { "aux_distill/final_loss": 0.012840800042795308, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09817258664406836, "aux_distill/mean_u": 0.31282819825992186, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 183.25, "aux_distill/step_loss": 0.9432034566998482, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5495705986360192, "calib/avg_num_step_conf": 5.7265625, "calib/ece": 0.16526963359243027, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": 0.019485866023351894, "calib/mean_conf": 0.27122439031195217, "calib/mu_c": 0.2878378378378379, "calib/mu_w": 0.268351971814486, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.14454183266932272, "calib/std_conf": 0.2296847330673, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.32742990654205606, "calib/step_q_c_n": 214.0, "calib/step_q_gap": 0.036235939756113555, "calib/step_q_w": 0.2911939667859425, "calib/step_q_w_n": 1252.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2245.0, "completions/max_terminated_length": 2245.0, "completions/mean_length": 282.25390625, "completions/mean_terminated_length": 282.25390625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.064, "grad_norm": 0.008562030270695686, "learning_rate": 3.88888888888889e-06, "loss": 0.1449, "num_tokens": 13786802.0, "reward": 0.9524194002151489, "reward_std": 0.16599920392036438, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.7876513600349426, "rewards/format_reward_step": 0.97265625, "step": 60 }, { "aux_distill/final_loss": 0.0015239762360579334, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09325975948013365, "aux_distill/mean_u": 0.316322311514381, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 175.25, "aux_distill/step_loss": 0.9280256535857916, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5334341397849462, "calib/avg_num_step_conf": 5.4765625, "calib/ece": 0.13600669291338585, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.029471219758064576, "calib/mean_conf": 0.24785157480314962, "calib/mu_c": 0.27012903225806456, "calib/mu_w": 0.24065781249999998, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06988188976377953, "calib/std_conf": 0.2037172687044133, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29107801418439716, "calib/step_q_c_n": 282.0, "calib/step_q_gap": 0.024597299898682867, "calib/step_q_w": 0.2664807142857143, "calib/step_q_w_n": 1120.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2388.0, "completions/max_terminated_length": 2388.0, "completions/mean_length": 267.8203125, "completions/mean_terminated_length": 267.8203125, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.06506666666666666, "grad_norm": 0.008971858769655228, "learning_rate": 3.861111111111112e-06, "loss": 0.0999, "num_tokens": 13959428.0, "reward": 1.002663254737854, "reward_std": 0.14281585812568665, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.774857759475708, "rewards/format_reward_step": 0.98828125, "step": 61 }, { "aux_distill/final_loss": 0.013271907766466029, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09631023649126291, "aux_distill/mean_u": 0.3254364545393771, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 188.375, "aux_distill/step_loss": 0.9232866205275059, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.39499999999999996, "calib/avg_num_step_conf": 5.89453125, "calib/ece": 0.20019116465863454, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.043102888888888885, "calib/mean_conf": 0.24186506024096388, "calib/mu_c": 0.20291666666666666, "calib/mu_w": 0.24601955555555555, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17283534136546183, "calib/std_conf": 0.18232861046162738, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2070762711864407, "calib/step_q_c_n": 118.0, "calib/step_q_gap": -0.06400424642678718, "calib/step_q_w": 0.2710805176132279, "calib/step_q_w_n": 1391.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2083.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 293.640625, "completions/mean_terminated_length": 294.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.06613333333333334, "grad_norm": 0.00813973043113947, "learning_rate": 3.833333333333334e-06, "loss": 0.1688, "num_tokens": 14141680.0, "reward": 0.939253568649292, "reward_std": 0.1461803913116455, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.819913387298584, "rewards/format_reward_step": 0.96484375, "step": 62 }, { "aux_distill/final_loss": 0.01066195282874105, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09622492571361363, "aux_distill/mean_u": 0.3600727580316144, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 202.375, "aux_distill/step_loss": 0.9302633851766586, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5091911764705882, "calib/avg_num_step_conf": 6.6328125, "calib/ece": 0.1437712, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.012, "calib/gap": 0.027395860566448826, "calib/mean_conf": 0.2283888, "calib/mu_c": 0.2520588235294118, "calib/mu_w": 0.22466296296296295, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11808, "calib/std_conf": 0.1971235718897159, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2832857142857143, "calib/step_q_c_n": 210.0, "calib/step_q_gap": 0.027071332565284212, "calib/step_q_w": 0.2562143817204301, "calib/step_q_w_n": 1488.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 304.390625, "completions/mean_terminated_length": 306.78741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.0672, "grad_norm": 0.007689535152167082, "learning_rate": 3.8055555555555556e-06, "loss": 0.0824, "num_tokens": 14328244.0, "reward": 0.9636499881744385, "reward_std": 0.15414997935295105, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8179250359535217, "rewards/format_reward_step": 0.97265625, "step": 63 }, { "aux_distill/final_loss": 0.02386802105775132, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09631462418474257, "aux_distill/mean_u": 0.30345787882691766, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 191.75, "aux_distill/step_loss": 0.891542162746191, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5595238095238095, "calib/avg_num_step_conf": 5.9921875, "calib/ece": 0.11710276679841897, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04432678853531935, "calib/mean_conf": 0.20898418972332017, "calib/mu_c": 0.24595238095238095, "calib/mu_w": 0.2016255924170616, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0800395256916996, "calib/std_conf": 0.19345013898101568, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2917105263157894, "calib/step_q_c_n": 228.0, "calib/step_q_gap": 0.0408718586281937, "calib/step_q_w": 0.2508386676875957, "calib/step_q_w_n": 1306.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 766.0, "completions/max_terminated_length": 766.0, "completions/mean_length": 271.4609375, "completions/mean_terminated_length": 272.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.06826666666666667, "grad_norm": 0.008217972703278065, "learning_rate": 3.777777777777778e-06, "loss": 0.1115, "num_tokens": 14501514.0, "reward": 0.9809128046035767, "reward_std": 0.13844476640224457, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8172944188117981, "rewards/format_reward_step": 0.98046875, "step": 64 }, { "aux_distill/final_loss": 0.01491037463711109, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0959760737605393, "aux_distill/mean_u": 0.3108982882823852, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 185.0, "aux_distill/step_loss": 0.9150295872241259, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47708674304418985, "calib/avg_num_step_conf": 5.78515625, "calib/ece": 0.15480175782722427, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.010455291779942505, "calib/mean_conf": 0.21257079119238356, "calib/mu_c": 0.2040425531914893, "calib/mu_w": 0.21449784497143182, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09152941176470587, "calib/std_conf": 0.18973537966805565, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2613697095435685, "calib/step_q_c_n": 241.0, "calib/step_q_gap": 0.020063619419328244, "calib/step_q_w": 0.24130609012424023, "calib/step_q_w_n": 1240.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 715.0, "completions/max_terminated_length": 715.0, "completions/mean_length": 267.62109375, "completions/mean_terminated_length": 268.67059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.06933333333333333, "grad_norm": 0.00842042826116085, "learning_rate": 3.7500000000000005e-06, "loss": 0.0848, "num_tokens": 14675049.0, "reward": 0.9741963148117065, "reward_std": 0.14658647775650024, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.7882363796234131, "rewards/format_reward_step": 0.9765625, "step": 65 }, { "aux_distill/final_loss": 0.012572216726766783, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09674823796376586, "aux_distill/mean_u": 0.3569632117817762, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 199.125, "aux_distill/step_loss": 0.9297657012939453, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4691566566566567, "calib/avg_num_step_conf": 6.22265625, "calib/ece": 0.17058498023715415, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": -0.018828078078078014, "calib/mean_conf": 0.20688537549407118, "calib/mu_c": 0.19081081081081086, "calib/mu_w": 0.20963888888888887, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11561264822134386, "calib/std_conf": 0.1878045161020822, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2612807881773399, "calib/step_q_c_n": 203.0, "calib/step_q_gap": 0.026016399688131242, "calib/step_q_w": 0.23526438848920866, "calib/step_q_w_n": 1390.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 305.96875, "completions/mean_terminated_length": 305.96875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.0704, "grad_norm": 0.00837609264999628, "learning_rate": 3.7222222222222225e-06, "loss": 0.1152, "num_tokens": 14859729.0, "reward": 0.9772807359695435, "reward_std": 0.10917495936155319, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.8217490911483765, "rewards/format_reward_step": 0.98828125, "step": 66 }, { "aux_distill/final_loss": 0.0044797318550990894, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09419736289419234, "aux_distill/mean_u": 0.3200274721063564, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 197.875, "aux_distill/step_loss": 0.9285344202071428, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5055102040816326, "calib/avg_num_step_conf": 6.18359375, "calib/ece": 0.14394417670682727, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021607132653061273, "calib/mean_conf": 0.19372650602409636, "calib/mu_c": 0.21108163265306126, "calib/mu_w": 0.1894745, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07044176706827308, "calib/std_conf": 0.17925244348186117, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2638344827586207, "calib/step_q_c_n": 290.0, "calib/step_q_gap": 0.0199265941275302, "calib/step_q_w": 0.2439078886310905, "calib/step_q_w_n": 1293.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 298.0703125, "completions/mean_terminated_length": 299.2392272949219, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.07146666666666666, "grad_norm": 0.00842991005629301, "learning_rate": 3.694444444444445e-06, "loss": 0.0872, "num_tokens": 15041043.0, "reward": 0.9772289395332336, "reward_std": 0.15305714309215546, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.7903953790664673, "rewards/format_reward_step": 0.96875, "step": 67 }, { "aux_distill/final_loss": 0.009524599982796644, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09283689758740366, "aux_distill/mean_u": 0.30089278194712804, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 198.75, "aux_distill/step_loss": 0.8997951615601778, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5079744816586922, "calib/avg_num_step_conf": 6.26171875, "calib/ece": 0.1264251968503937, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.008591174906964344, "calib/mean_conf": 0.20270866141732283, "calib/mu_c": 0.20977777777777776, "calib/mu_w": 0.2011866028708134, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07598425196850395, "calib/std_conf": 0.18526763404381363, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22980392156862745, "calib/step_q_c_n": 255.0, "calib/step_q_gap": -0.025651568045615913, "calib/step_q_w": 0.25545548961424336, "calib/step_q_w_n": 1348.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 845.0, "completions/max_terminated_length": 845.0, "completions/mean_length": 285.70703125, "completions/mean_terminated_length": 286.82745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.07253333333333334, "grad_norm": 0.008538416586816311, "learning_rate": 3.6666666666666666e-06, "loss": 0.0745, "num_tokens": 15218272.0, "reward": 0.9916496276855469, "reward_std": 0.10609979927539825, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8153305053710938, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "aux_distill/final_loss": 0.0032871251341930474, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0926831008400768, "aux_distill/mean_u": 0.3187473535917535, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 185.25, "aux_distill/step_loss": 0.9169696141034365, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48989485981308417, "calib/avg_num_step_conf": 5.83203125, "calib/ece": 0.12110393700787403, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.019810280373831785, "calib/mean_conf": 0.17180944881889765, "calib/mu_c": 0.18850000000000003, "calib/mu_w": 0.16868971962616824, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06771653543307088, "calib/std_conf": 0.16317557976602892, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.23838174273858923, "calib/step_q_c_n": 241.0, "calib/step_q_gap": 0.02208743640338473, "calib/step_q_w": 0.2162943063352045, "calib/step_q_w_n": 1247.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 895.0, "completions/max_terminated_length": 895.0, "completions/mean_length": 291.23828125, "completions/mean_terminated_length": 292.3804016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.0736, "grad_norm": 0.008527516387403011, "learning_rate": 3.638888888888889e-06, "loss": 0.1258, "num_tokens": 15397325.0, "reward": 0.9724764823913574, "reward_std": 0.12118256092071533, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8199528455734253, "rewards/format_reward_step": 0.96875, "step": 69 }, { "aux_distill/final_loss": 0.006224807533726562, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09226469928398728, "aux_distill/mean_u": 0.3327977402393844, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 212.0, "aux_distill/step_loss": 0.903972553089261, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.541778503909441, "calib/avg_num_step_conf": 6.71875, "calib/ece": 0.09458640000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01853315439374492, "calib/mean_conf": 0.1862136, "calib/mu_c": 0.20170731707317077, "calib/mu_w": 0.18317416267942585, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.058400000000000014, "calib/std_conf": 0.16473271142987966, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2516, "calib/step_q_c_n": 225.0, "calib/step_q_gap": 0.03907003344481605, "calib/step_q_w": 0.21252996655518394, "calib/step_q_w_n": 1495.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2264.0, "completions/max_terminated_length": 2264.0, "completions/mean_length": 310.87109375, "completions/mean_terminated_length": 312.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.07466666666666667, "grad_norm": 0.008320843800902367, "learning_rate": 3.6111111111111115e-06, "loss": 0.0952, "num_tokens": 15583900.0, "reward": 0.9748121500015259, "reward_std": 0.13961410522460938, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8168118000030518, "rewards/format_reward_step": 0.97265625, "step": 70 }, { "aux_distill/final_loss": 0.01457615937852097, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09353712585289031, "aux_distill/mean_u": 0.3341587244126239, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 219.25, "aux_distill/step_loss": 0.8916427735239267, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4694613821138211, "calib/avg_num_step_conf": 6.8515625, "calib/ece": 0.14871581027667982, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004845447154471522, "calib/mean_conf": 0.18850948616600788, "calib/mu_c": 0.18458333333333332, "calib/mu_w": 0.18942878048780484, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07375098814229249, "calib/std_conf": 0.18232975444686195, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2465986394557823, "calib/step_q_c_n": 294.0, "calib/step_q_gap": 0.0239691874009878, "calib/step_q_w": 0.2226294520547945, "calib/step_q_w_n": 1460.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 342.21875, "completions/mean_terminated_length": 342.21875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.07573333333333333, "grad_norm": 0.00771966902539134, "learning_rate": 3.5833333333333335e-06, "loss": 0.1055, "num_tokens": 15775916.0, "reward": 0.9849973917007446, "reward_std": 0.1192353218793869, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.7981197834014893, "rewards/format_reward_step": 0.984375, "step": 71 }, { "aux_distill/final_loss": 0.0070067721644591074, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09377522463910282, "aux_distill/mean_u": 0.32747618567467474, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 217.375, "aux_distill/step_loss": 0.9167319145053625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5759093619558736, "calib/avg_num_step_conf": 6.8046875, "calib/ece": 0.1234972440944882, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.07430248062015504, "calib/mean_conf": 0.1937728346456693, "calib/mu_c": 0.25666666666666665, "calib/mu_w": 0.1823641860465116, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08186338582677166, "calib/std_conf": 0.19557771863334295, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.273109022556391, "calib/step_q_c_n": 266.0, "calib/step_q_gap": 0.05228497106587615, "calib/step_q_w": 0.22082405149051487, "calib/step_q_w_n": 1476.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 772.0, "completions/max_terminated_length": 772.0, "completions/mean_length": 298.0234375, "completions/mean_terminated_length": 299.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.0768, "grad_norm": 0.009275647811591625, "learning_rate": 3.555555555555556e-06, "loss": 0.0917, "num_tokens": 15956618.0, "reward": 0.9936858415603638, "reward_std": 0.10461928695440292, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8428404927253723, "rewards/format_reward_step": 0.9921875, "step": 72 }, { "aux_distill/final_loss": 0.026022471355645393, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09704366908408701, "aux_distill/mean_u": 0.2787974081328851, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 206.75, "aux_distill/step_loss": 0.8923692554235458, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4880334486735871, "calib/avg_num_step_conf": 6.49609375, "calib/ece": 0.1279764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.016696078431372552, "calib/mean_conf": 0.17767058823529414, "calib/mu_c": 0.16431372549019607, "calib/mu_w": 0.18100980392156862, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05282352941176469, "calib/std_conf": 0.1659688275646466, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21652920962199315, "calib/step_q_c_n": 291.0, "calib/step_q_gap": -0.010110688262762485, "calib/step_q_w": 0.22663989788475564, "calib/step_q_w_n": 1371.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 790.0, "completions/max_terminated_length": 790.0, "completions/mean_length": 303.02734375, "completions/mean_terminated_length": 304.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.07786666666666667, "grad_norm": 0.00969206728041172, "learning_rate": 3.5277777777777784e-06, "loss": 0.0929, "num_tokens": 16141225.0, "reward": 0.9954813718795776, "reward_std": 0.08951573073863983, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.7995564937591553, "rewards/format_reward_step": 0.9921875, "step": 73 }, { "aux_distill/final_loss": 0.001336570298008155, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09164642659015954, "aux_distill/mean_u": 0.31843684331749694, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 207.5, "aux_distill/step_loss": 0.9124545361846685, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5203559738134207, "calib/avg_num_step_conf": 6.484375, "calib/ece": 0.13404905882352938, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.00830483121931258, "calib/mean_conf": 0.18982160784313726, "calib/mu_c": 0.19659574468085103, "calib/mu_w": 0.18829091346153845, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0697784705882353, "calib/std_conf": 0.2017439701948087, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2442222222222222, "calib/step_q_c_n": 270.0, "calib/step_q_gap": 0.025577711430855354, "calib/step_q_w": 0.21864451079136685, "calib/step_q_w_n": 1390.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 295.390625, "completions/mean_terminated_length": 295.390625, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.07893333333333333, "grad_norm": 0.009934015572071075, "learning_rate": 3.5e-06, "loss": 0.1233, "num_tokens": 16320773.0, "reward": 0.9939709305763245, "reward_std": 0.10015146434307098, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8082544207572937, "rewards/format_reward_step": 0.99609375, "step": 74 }, { "aux_distill/final_loss": 0.007649911072803661, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09006717673037201, "aux_distill/mean_u": 0.2910189388968526, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 217.0, "aux_distill/step_loss": 0.8777220211923122, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.43498713956951396, "calib/avg_num_step_conf": 6.78125, "calib/ece": 0.26467058823529416, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.028241234601326687, "calib/mean_conf": 0.171721568627451, "calib/mu_c": 0.1533370786516854, "calib/mu_w": 0.18157831325301207, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04368627450980392, "calib/std_conf": 0.1767203532711626, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20076884422110555, "calib/step_q_c_n": 597.0, "calib/step_q_gap": -0.014894895901809285, "calib/step_q_w": 0.21566374012291484, "calib/step_q_w_n": 1139.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1959.0, "completions/max_terminated_length": 1959.0, "completions/mean_length": 315.84765625, "completions/mean_terminated_length": 315.84765625, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.08, "grad_norm": 0.009579086676239967, "learning_rate": 3.4722222222222224e-06, "loss": 0.1133, "num_tokens": 16506382.0, "reward": 1.007990837097168, "reward_std": 0.1300535500049591, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6839504241943359, "rewards/format_reward_step": 0.984375, "step": 75 }, { "aux_distill/final_loss": 0.01286680546036223, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09864677162840962, "aux_distill/mean_u": 0.3541003805943957, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 213.5, "aux_distill/step_loss": 0.9478672724217176, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5315508489410117, "calib/avg_num_step_conf": 6.67578125, "calib/ece": 0.14995450980392155, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.005830141781900944, "calib/mean_conf": 0.1600062745098039, "calib/mu_c": 0.16451034482758622, "calib/mu_w": 0.15868020304568528, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04125490196078431, "calib/std_conf": 0.1729595050640556, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20488826815642455, "calib/step_q_c_n": 358.0, "calib/step_q_gap": 0.01353297578040677, "calib/step_q_w": 0.19135529237601778, "calib/step_q_w_n": 1351.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 298.48828125, "completions/mean_terminated_length": 299.6588439941406, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.08106666666666666, "grad_norm": 0.00940918643027544, "learning_rate": 3.444444444444445e-06, "loss": 0.0615, "num_tokens": 16685851.0, "reward": 1.0017704963684082, "reward_std": 0.09616874903440475, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7886971235275269, "rewards/format_reward_step": 0.98828125, "step": 76 }, { "aux_distill/final_loss": 0.0038691447043675, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09274809854105115, "aux_distill/mean_u": 0.32949178263100115, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 233.875, "aux_distill/step_loss": 0.9158735387027264, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48913308913308917, "calib/avg_num_step_conf": 7.30859375, "calib/ece": 0.18323134586871045, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003842936891134796, "calib/mean_conf": 0.2007056620053053, "calib/mu_c": 0.19784615384615384, "calib/mu_w": 0.20168909073728863, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06401574803149605, "calib/std_conf": 0.1796372475476522, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22466580976863756, "calib/step_q_c_n": 389.0, "calib/step_q_gap": 0.011227794823058884, "calib/step_q_w": 0.21343801494557868, "calib/step_q_w_n": 1482.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 328.296875, "completions/mean_terminated_length": 328.296875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.08213333333333334, "grad_norm": 0.00928324181586504, "learning_rate": 3.416666666666667e-06, "loss": 0.0955, "num_tokens": 16874559.0, "reward": 0.9954378604888916, "reward_std": 0.13767540454864502, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7565008401870728, "rewards/format_reward_step": 0.98046875, "step": 77 }, { "aux_distill/final_loss": 0.00453551660393714, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09306294191628695, "aux_distill/mean_u": 0.35334473479644335, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 228.25, "aux_distill/step_loss": 0.9170228522270918, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5382547169811321, "calib/avg_num_step_conf": 7.1328125, "calib/ece": 0.16031620553359685, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016947169811320717, "calib/mean_conf": 0.17415019762845851, "calib/mu_c": 0.18754716981132075, "calib/mu_w": 0.17060000000000003, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06249011857707509, "calib/std_conf": 0.18909689981521058, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22063563535911604, "calib/step_q_c_n": 362.0, "calib/step_q_gap": 0.0001957446487335368, "calib/step_q_w": 0.2204398907103825, "calib/step_q_w_n": 1464.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2535.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 343.0625, "completions/mean_terminated_length": 343.0625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.0832, "grad_norm": 0.008053023368120193, "learning_rate": 3.3888888888888893e-06, "loss": 0.1186, "num_tokens": 17070407.0, "reward": 0.9905759692192078, "reward_std": 0.11786898970603943, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7897456884384155, "rewards/format_reward_step": 0.984375, "step": 78 }, { "aux_distill/final_loss": 0.005098032514069928, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0909080128185451, "aux_distill/mean_u": 0.322553311796659, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 232.25, "aux_distill/step_loss": 0.8937860177829862, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5226716756945659, "calib/avg_num_step_conf": 7.6953125, "calib/ece": 0.15509762845849803, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.009803616984099234, "calib/mean_conf": 0.19889446640316208, "calib/mu_c": 0.20641186440677967, "calib/mu_w": 0.19660824742268043, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06039525691699605, "calib/std_conf": 0.19399051230062755, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22875072815533978, "calib/step_q_c_n": 412.0, "calib/step_q_gap": -0.013005148530594701, "calib/step_q_w": 0.24175587668593448, "calib/step_q_w_n": 1557.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2059.0, "completions/max_terminated_length": 2059.0, "completions/mean_length": 357.65625, "completions/mean_terminated_length": 360.4724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.08426666666666667, "grad_norm": 0.007810310926288366, "learning_rate": 3.3611111111111117e-06, "loss": 0.0661, "num_tokens": 17268343.0, "reward": 0.9874042272567749, "reward_std": 0.14472907781600952, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7677773237228394, "rewards/format_reward_step": 0.9765625, "step": 79 }, { "aux_distill/final_loss": 0.02118439533660421, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09981375257484615, "aux_distill/mean_u": 0.355794684343527, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 221.0, "aux_distill/step_loss": 0.9345843084156513, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5041666666666667, "calib/avg_num_step_conf": 6.90625, "calib/ece": 0.20478346456692917, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.009777298850574656, "calib/mean_conf": 0.19017716535433069, "calib/mu_c": 0.196875, "calib/mu_w": 0.18709770114942534, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04, "calib/std_conf": 0.1998494810329267, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.21786329365079365, "calib/step_q_c_n": 504.0, "calib/step_q_gap": 0.002584891752059476, "calib/step_q_w": 0.21527840189873418, "calib/step_q_w_n": 1264.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2182.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 324.84765625, "completions/mean_terminated_length": 324.84765625, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 0.08533333333333333, "grad_norm": 0.008695478551089764, "learning_rate": 3.3333333333333333e-06, "loss": 0.1084, "num_tokens": 17453664.0, "reward": 1.0159547328948975, "reward_std": 0.12883907556533813, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7272218465805054, "rewards/format_reward_step": 0.9921875, "step": 80 }, { "aux_distill/final_loss": 0.0012994232774872216, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09247418725863099, "aux_distill/mean_u": 0.3308540225569126, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 243.75, "aux_distill/step_loss": 0.9208435844630003, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5451582867783985, "calib/avg_num_step_conf": 7.8984375, "calib/ece": 0.1807569721115538, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.008699410304158939, "calib/mean_conf": 0.169601593625498, "calib/mu_c": 0.17580555555555558, "calib/mu_w": 0.16710614525139664, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03175298804780877, "calib/std_conf": 0.18291364909527338, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.20241322314049584, "calib/step_q_c_n": 484.0, "calib/step_q_gap": -0.0023286494212726994, "calib/step_q_w": 0.20474187256176854, "calib/step_q_w_n": 1538.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 359.78515625, "completions/mean_terminated_length": 365.4960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.0864, "grad_norm": 0.007718355860561132, "learning_rate": 3.3055555555555558e-06, "loss": 0.0819, "num_tokens": 17652017.0, "reward": 0.992728054523468, "reward_std": 0.13900279998779297, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7315497994422913, "rewards/format_reward_step": 0.97265625, "step": 81 }, { "aux_distill/final_loss": 0.0007361993903032271, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0872638972941786, "aux_distill/mean_u": 0.2819722442933623, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 236.0, "aux_distill/step_loss": 0.8704303652048111, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5462806424344886, "calib/avg_num_step_conf": 7.375, "calib/ece": 0.20583043478260873, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.016192526063679907, "calib/mean_conf": 0.16492055335968375, "calib/mu_c": 0.17573690476190476, "calib/mu_w": 0.15954437869822485, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01936758893280632, "calib/std_conf": 0.165332041196902, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24561, "calib/step_q_c_n": 540.0, "calib/step_q_gap": 0.027734629080118672, "calib/step_q_w": 0.21787537091988132, "calib/step_q_w_n": 1348.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 358.24609375, "completions/mean_terminated_length": 358.24609375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.08746666666666666, "grad_norm": 0.00759322801604867, "learning_rate": 3.277777777777778e-06, "loss": 0.1051, "num_tokens": 17849280.0, "reward": 1.0072963237762451, "reward_std": 0.1500355303287506, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.709905207157135, "rewards/format_reward_step": 0.9765625, "step": 82 }, { "aux_distill/final_loss": 0.005374661686801119, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08906476967968047, "aux_distill/mean_u": 0.32054090934212937, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 280.125, "aux_distill/step_loss": 0.8745236918330193, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6215083798882681, "calib/avg_num_step_conf": 8.89453125, "calib/ece": 0.17135458167330675, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.07035389509621354, "calib/mean_conf": 0.16498007968127493, "calib/mu_c": 0.21515277777777778, "calib/mu_w": 0.14479888268156424, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.024741035856573702, "calib/std_conf": 0.18749063084584716, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25647752808988766, "calib/step_q_c_n": 534.0, "calib/step_q_gap": 0.05890552579499381, "calib/step_q_w": 0.19757200229489386, "calib/step_q_w_n": 1743.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2742.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 409.953125, "completions/mean_terminated_length": 411.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.08853333333333334, "grad_norm": 0.011330654844641685, "learning_rate": 3.2500000000000002e-06, "loss": 0.1335, "num_tokens": 18061492.0, "reward": 0.9993002414703369, "reward_std": 0.15400338172912598, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7486004829406738, "rewards/format_reward_step": 0.96875, "step": 83 }, { "aux_distill/final_loss": 0.0014974686982895946, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09288601903244853, "aux_distill/mean_u": 0.32246681592686244, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 231.875, "aux_distill/step_loss": 0.92436776868999, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5191112716763006, "calib/avg_num_step_conf": 7.24609375, "calib/ece": 0.23661660079051383, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.00378106936416181, "calib/mean_conf": 0.16803952569169958, "calib/mu_c": 0.170625, "calib/mu_w": 0.1668439306358382, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.044225296442687745, "calib/std_conf": 0.18492187865290066, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23576146788990826, "calib/step_q_c_n": 545.0, "calib/step_q_gap": 0.02361803277540442, "calib/step_q_w": 0.21214343511450384, "calib/step_q_w_n": 1310.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2355.0, "completions/max_terminated_length": 2355.0, "completions/mean_length": 363.68359375, "completions/mean_terminated_length": 363.68359375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.0896, "grad_norm": 0.008948885835707188, "learning_rate": 3.2222222222222227e-06, "loss": 0.1497, "num_tokens": 18260515.0, "reward": 1.0107507705688477, "reward_std": 0.12221451848745346, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7207201719284058, "rewards/format_reward_step": 0.98828125, "step": 84 }, { "aux_distill/final_loss": 0.0013104632453178056, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0882079191505909, "aux_distill/mean_u": 0.3178119961895146, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 255.125, "aux_distill/step_loss": 0.8781477827578783, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5055598455598456, "calib/avg_num_step_conf": 7.98046875, "calib/ece": 0.20079959839357428, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": -0.0022554131274131195, "calib/mean_conf": 0.182814859437751, "calib/mu_c": 0.18122972972972973, "calib/mu_w": 0.18348514285714285, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04321285140562249, "calib/std_conf": 0.18925974454092212, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2211341719077568, "calib/step_q_c_n": 477.0, "calib/step_q_gap": 0.006088578038025011, "calib/step_q_w": 0.2150455938697318, "calib/step_q_w_n": 1566.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2625.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 404.17578125, "completions/mean_terminated_length": 405.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.09066666666666667, "grad_norm": 0.007939356379210949, "learning_rate": 3.1944444444444443e-06, "loss": 0.1366, "num_tokens": 18471808.0, "reward": 0.9874961972236633, "reward_std": 0.1482926607131958, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7171797156333923, "rewards/format_reward_step": 0.96875, "step": 85 }, { "aux_distill/final_loss": 0.00817710635055846, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09186960756778717, "aux_distill/mean_u": 0.33337004361021644, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 253.75, "aux_distill/step_loss": 0.8941647335886955, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5443541965281096, "calib/avg_num_step_conf": 7.9296875, "calib/ece": 0.15467131474103585, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.038398829431438075, "calib/mean_conf": 0.17911354581673306, "calib/mu_c": 0.2069565217391304, "calib/mu_w": 0.16855769230769233, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02944223107569721, "calib/std_conf": 0.1938464091347558, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24343429256594726, "calib/step_q_c_n": 417.0, "calib/step_q_gap": 0.04763210375204835, "calib/step_q_w": 0.1958021888138989, "calib/step_q_w_n": 1613.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2882.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 396.1015625, "completions/mean_terminated_length": 397.6549377441406, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.09173333333333333, "grad_norm": 0.008128196001052856, "learning_rate": 3.1666666666666667e-06, "loss": 0.1691, "num_tokens": 18678722.0, "reward": 1.002101182937622, "reward_std": 0.14600814878940582, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7542024254798889, "rewards/format_reward_step": 0.98046875, "step": 86 }, { "aux_distill/final_loss": 0.0071426674749091035, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08816275629214942, "aux_distill/mean_u": 0.30751081756694526, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 249.25, "aux_distill/step_loss": 0.8601995501667261, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5143947277141866, "calib/avg_num_step_conf": 7.93359375, "calib/ece": 0.25008467741935486, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00022150537634407774, "calib/mean_conf": 0.16781854838709678, "calib/mu_c": 0.16795698924731184, "calib/mu_w": 0.16773548387096776, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.021451612903225812, "calib/std_conf": 0.16624613547142425, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.23667210884353745, "calib/step_q_c_n": 588.0, "calib/step_q_gap": 0.03593475610618471, "calib/step_q_w": 0.20073735273735274, "calib/step_q_w_n": 1443.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2539.0, "completions/max_terminated_length": 2539.0, "completions/mean_length": 397.36328125, "completions/mean_terminated_length": 398.9216003417969, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.0928, "grad_norm": 0.007947259582579136, "learning_rate": 3.138888888888889e-06, "loss": 0.1589, "num_tokens": 18885943.0, "reward": 0.9949440956115723, "reward_std": 0.16049188375473022, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6656694412231445, "rewards/format_reward_step": 0.9609375, "step": 87 }, { "aux_distill/final_loss": 0.0010045899275610282, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08910067100077868, "aux_distill/mean_u": 0.30216705973354374, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 252.875, "aux_distill/step_loss": 0.8879929222166538, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5461696056193751, "calib/avg_num_step_conf": 7.90234375, "calib/ece": 0.213781746031746, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.021131996780566292, "calib/mean_conf": 0.18994841269841273, "calib/mu_c": 0.20445569620253162, "calib/mu_w": 0.18332369942196533, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04511904761904762, "calib/std_conf": 0.21408506254621545, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2332246621621622, "calib/step_q_c_n": 592.0, "calib/step_q_gap": -0.005986379067746989, "calib/step_q_w": 0.2392110412299092, "calib/step_q_w_n": 1431.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 395.8984375, "completions/mean_terminated_length": 399.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.09386666666666667, "grad_norm": 0.007929529994726181, "learning_rate": 3.1111111111111116e-06, "loss": 0.0733, "num_tokens": 19097141.0, "reward": 1.0030913352966309, "reward_std": 0.15919756889343262, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.7210264205932617, "rewards/format_reward_step": 0.9765625, "step": 88 }, { "aux_distill/final_loss": 0.001158037934146705, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09117010282352567, "aux_distill/mean_u": 0.3445973235459962, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 257.625, "aux_distill/step_loss": 0.9082269109785557, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5412087912087912, "calib/avg_num_step_conf": 8.2578125, "calib/ece": 0.17095617529880477, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.03232043318999839, "calib/mean_conf": 0.21207171314741036, "calib/mu_c": 0.23550724637681159, "calib/mu_w": 0.2031868131868132, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.054063745019920326, "calib/std_conf": 0.2102883877264814, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26942257383966245, "calib/step_q_c_n": 474.0, "calib/step_q_gap": 0.04417623237624782, "calib/step_q_w": 0.22524634146341463, "calib/step_q_w_n": 1640.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2811.0, "completions/max_terminated_length": 2811.0, "completions/mean_length": 395.9453125, "completions/mean_terminated_length": 399.06298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.09493333333333333, "grad_norm": 0.008111786097288132, "learning_rate": 3.0833333333333336e-06, "loss": 0.1331, "num_tokens": 19307391.0, "reward": 1.0002186298370361, "reward_std": 0.15321889519691467, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7504370808601379, "rewards/format_reward_step": 0.98046875, "step": 89 }, { "aux_distill/final_loss": 0.0017116198050644016, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09068862511776388, "aux_distill/mean_u": 0.3575135793071044, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 273.875, "aux_distill/step_loss": 0.9017513785511255, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5654965753424657, "calib/avg_num_step_conf": 9.26953125, "calib/ece": 0.1963935742971888, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": 0.04149330635118309, "calib/mean_conf": 0.19669879518072286, "calib/mu_c": 0.22602739726027396, "calib/mu_w": 0.18453409090909087, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04995983935742972, "calib/std_conf": 0.22210598802874976, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2510097087378641, "calib/step_q_c_n": 515.0, "calib/step_q_gap": 0.036823056423547634, "calib/step_q_w": 0.21418665231431647, "calib/step_q_w_n": 1858.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 416.63671875, "completions/mean_terminated_length": 421.57708740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.096, "grad_norm": 0.008595678955316544, "learning_rate": 3.055555555555556e-06, "loss": 0.1553, "num_tokens": 19517370.0, "reward": 0.9869974255561829, "reward_std": 0.1972612589597702, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7239947319030762, "rewards/format_reward_step": 0.96484375, "step": 90 }, { "aux_distill/final_loss": 0.0005224488650128478, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08907682518474758, "aux_distill/mean_u": 0.30453080002601157, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 261.625, "aux_distill/step_loss": 0.8892008904367685, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4905531609195402, "calib/avg_num_step_conf": 8.48046875, "calib/ece": 0.238751968503937, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": -0.013195402298850578, "calib/mean_conf": 0.18053937007874016, "calib/mu_c": 0.17149999999999999, "calib/mu_w": 0.18469540229885056, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05216535433070866, "calib/std_conf": 0.20483289815489195, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23011219512195122, "calib/step_q_c_n": 615.0, "calib/step_q_gap": 0.005790922628377937, "calib/step_q_w": 0.22432127249357328, "calib/step_q_w_n": 1556.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 403.91015625, "completions/mean_terminated_length": 407.0905456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.09706666666666666, "grad_norm": 0.006919812876731157, "learning_rate": 3.0277777777777776e-06, "loss": 0.0914, "num_tokens": 19728483.0, "reward": 1.0087969303131104, "reward_std": 0.12799367308616638, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7129064798355103, "rewards/format_reward_step": 0.9921875, "step": 91 }, { "aux_distill/final_loss": 0.018626842816956923, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09000969352200627, "aux_distill/mean_u": 0.28446525599704253, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 249.875, "aux_distill/step_loss": 0.8442163858562708, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49432539682539683, "calib/avg_num_step_conf": 8.21484375, "calib/ece": 0.19927200000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": -5.8730158730091464e-05, "calib/mean_conf": 0.22632799999999997, "calib/mu_c": 0.2262857142857143, "calib/mu_w": 0.2263444444444444, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07280000000000002, "calib/std_conf": 0.23856163232171262, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2674322033898305, "calib/step_q_c_n": 472.0, "calib/step_q_gap": 0.027492289226740352, "calib/step_q_w": 0.23993991416309013, "calib/step_q_w_n": 1631.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2232.0, "completions/max_terminated_length": 2232.0, "completions/mean_length": 373.703125, "completions/mean_terminated_length": 378.1343994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.09813333333333334, "grad_norm": 0.012034201063215733, "learning_rate": 3e-06, "loss": 0.1209, "num_tokens": 19930871.0, "reward": 0.9856367111206055, "reward_std": 0.16968180239200592, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7212734222412109, "rewards/format_reward_step": 0.9765625, "step": 92 }, { "aux_distill/final_loss": 0.000823249449240393, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0883082477375865, "aux_distill/mean_u": 0.30699829971319126, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 289.5, "aux_distill/step_loss": 0.880612725391984, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5720172684458399, "calib/avg_num_step_conf": 9.25, "calib/ece": 0.20723809523809528, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": 0.03995604395604396, "calib/mean_conf": 0.22728571428571434, "calib/mu_c": 0.25614285714285717, "calib/mu_w": 0.2161868131868132, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07837301587301587, "calib/std_conf": 0.24419546741054568, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2672037037037037, "calib/step_q_c_n": 540.0, "calib/step_q_gap": 0.03859122422915531, "calib/step_q_w": 0.2286124794745484, "calib/step_q_w_n": 1827.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2524.0, "completions/max_terminated_length": 2524.0, "completions/mean_length": 431.96484375, "completions/mean_terminated_length": 435.36614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.0992, "grad_norm": 0.007137414067983627, "learning_rate": 2.9722222222222225e-06, "loss": 0.0803, "num_tokens": 20147230.0, "reward": 0.9957761168479919, "reward_std": 0.1708919107913971, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7376459240913391, "rewards/format_reward_step": 0.98046875, "step": 93 }, { "aux_distill/final_loss": 0.002480889372236561, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08463395736180246, "aux_distill/mean_u": 0.24747276303825033, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 233.375, "aux_distill/step_loss": 0.8388968780636787, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5755026223776224, "calib/avg_num_step_conf": 7.4375, "calib/ece": 0.18921259842519686, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.047409673659673635, "calib/mean_conf": 0.18535433070866142, "calib/mu_c": 0.21820512820512816, "calib/mu_w": 0.17079545454545453, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03374015748031497, "calib/std_conf": 0.1982255745022745, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28733956692913387, "calib/step_q_c_n": 508.0, "calib/step_q_gap": 0.054796515353202624, "calib/step_q_w": 0.23254305157593125, "calib/step_q_w_n": 1396.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2176.0, "completions/max_terminated_length": 2176.0, "completions/mean_length": 373.140625, "completions/mean_terminated_length": 374.60394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.10026666666666667, "grad_norm": 0.00853416696190834, "learning_rate": 2.944444444444445e-06, "loss": 0.0732, "num_tokens": 20351434.0, "reward": 1.014456033706665, "reward_std": 0.13239610195159912, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7398496270179749, "rewards/format_reward_step": 0.984375, "step": 94 }, { "aux_distill/final_loss": 0.0015408824556288891, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08758803072851151, "aux_distill/mean_u": 0.31895470567376694, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 263.875, "aux_distill/step_loss": 0.8712576478719711, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5107928047968021, "calib/avg_num_step_conf": 8.4140625, "calib/ece": 0.2502766798418972, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.009899333777481728, "calib/mean_conf": 0.20553359683794467, "calib/mu_c": 0.21171578947368427, "calib/mu_w": 0.20181645569620255, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04015810276679842, "calib/std_conf": 0.2279431917685282, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.26712024793388434, "calib/step_q_c_n": 726.0, "calib/step_q_gap": 0.046206382387665834, "calib/step_q_w": 0.2209138655462185, "calib/step_q_w_n": 1428.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2391.0, "completions/max_terminated_length": 2391.0, "completions/mean_length": 410.359375, "completions/mean_terminated_length": 411.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.10133333333333333, "grad_norm": 0.007505457382649183, "learning_rate": 2.916666666666667e-06, "loss": 0.123, "num_tokens": 20562614.0, "reward": 1.0167367458343506, "reward_std": 0.16785848140716553, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6780048608779907, "rewards/format_reward_step": 0.984375, "step": 95 }, { "aux_distill/final_loss": 0.001207535349749378, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08715620776638389, "aux_distill/mean_u": 0.29220774271238215, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 251.25, "aux_distill/step_loss": 0.8679394591599703, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5276768332251784, "calib/avg_num_step_conf": 8.23046875, "calib/ece": 0.34441767068273094, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": -0.005063400389357542, "calib/mean_conf": 0.2146987951807229, "calib/mu_c": 0.21197391304347826, "calib/mu_w": 0.2170373134328358, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.048634538152610436, "calib/std_conf": 0.24343212284389124, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2840686274509804, "calib/step_q_c_n": 816.0, "calib/step_q_gap": 0.042244769976154645, "calib/step_q_w": 0.24182385747482574, "calib/step_q_w_n": 1291.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2562.0, "completions/max_terminated_length": 2562.0, "completions/mean_length": 413.8359375, "completions/mean_terminated_length": 417.094482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.1024, "grad_norm": 0.01495582889765501, "learning_rate": 2.888888888888889e-06, "loss": 0.2041, "num_tokens": 20774372.0, "reward": 1.0166418552398682, "reward_std": 0.18338100612163544, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6114088296890259, "rewards/format_reward_step": 0.97265625, "step": 96 }, { "aux_distill/final_loss": 0.0035912545390601736, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08900718565564603, "aux_distill/mean_u": 0.2886631791030337, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 255.125, "aux_distill/step_loss": 0.8792980760335922, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5244067526676222, "calib/avg_num_step_conf": 7.97265625, "calib/ece": 0.19694820717131473, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.038758878802357066, "calib/mean_conf": 0.18117131474103587, "calib/mu_c": 0.20927536231884059, "calib/mu_w": 0.17051648351648352, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.051609561752988045, "calib/std_conf": 0.22696623503613622, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23784274193548388, "calib/step_q_c_n": 496.0, "calib/step_q_gap": 0.02100455423321851, "calib/step_q_w": 0.21683818770226537, "calib/step_q_w_n": 1545.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2465.0, "completions/max_terminated_length": 2465.0, "completions/mean_length": 392.73828125, "completions/mean_terminated_length": 392.73828125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.10346666666666667, "grad_norm": 0.008654974400997162, "learning_rate": 2.861111111111111e-06, "loss": 0.1672, "num_tokens": 20979985.0, "reward": 0.9918497800827026, "reward_std": 0.1644062101840973, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7376058101654053, "rewards/format_reward_step": 0.9765625, "step": 97 }, { "aux_distill/final_loss": 0.0005821615177410422, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09008797048591077, "aux_distill/mean_u": 0.3564260619450294, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 254.5, "aux_distill/step_loss": 0.8991331998258829, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4830921052631579, "calib/avg_num_step_conf": 7.953125, "calib/ece": 0.2870690476190476, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": -0.012511052631578923, "calib/mean_conf": 0.1797563492063492, "calib/mu_c": 0.17221000000000003, "calib/mu_w": 0.18472105263157895, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.035, "calib/std_conf": 0.21590370446693297, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23772814207650272, "calib/step_q_c_n": 732.0, "calib/step_q_gap": 0.0012896451439874113, "calib/step_q_w": 0.2364384969325153, "calib/step_q_w_n": 1304.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 413.30859375, "completions/mean_terminated_length": 413.30859375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.10453333333333334, "grad_norm": 0.006666828878223896, "learning_rate": 2.8333333333333335e-06, "loss": 0.1732, "num_tokens": 21191976.0, "reward": 1.0127978324890137, "reward_std": 0.15169328451156616, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6505955457687378, "rewards/format_reward_step": 0.984375, "step": 98 }, { "aux_distill/final_loss": 0.004728326463691701, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08814155263826251, "aux_distill/mean_u": 0.27947188991904526, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 268.875, "aux_distill/step_loss": 0.8672305308282375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5388687078360146, "calib/avg_num_step_conf": 8.40234375, "calib/ece": 0.16634920634920633, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": 0.010978723404255264, "calib/mean_conf": 0.20404761904761906, "calib/mu_c": 0.21297872340425528, "calib/mu_w": 0.202, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09194444444444443, "calib/std_conf": 0.2237688666211952, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2703977272727273, "calib/step_q_c_n": 352.0, "calib/step_q_gap": 0.014603397089292092, "calib/step_q_w": 0.2557943301834352, "calib/step_q_w_n": 1799.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2464.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 413.296875, "completions/mean_terminated_length": 414.91766357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1056, "grad_norm": 0.007676994893699884, "learning_rate": 2.805555555555556e-06, "loss": 0.0545, "num_tokens": 21403580.0, "reward": 0.9783390760421753, "reward_std": 0.1388581395149231, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.7887094020843506, "rewards/format_reward_step": 0.984375, "step": 99 }, { "aux_distill/final_loss": 0.0063580892933714495, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08806037961039692, "aux_distill/mean_u": 0.34808848005024406, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 263.75, "aux_distill/step_loss": 0.8615295179188251, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5004960317460317, "calib/avg_num_step_conf": 8.62890625, "calib/ece": 0.2257142292490119, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": -0.005435780423280384, "calib/mean_conf": 0.18574822134387353, "calib/mu_c": 0.1816875, "calib/mu_w": 0.18712328042328039, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0792490118577075, "calib/std_conf": 0.22822808136016548, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24062084942084938, "calib/step_q_c_n": 518.0, "calib/step_q_gap": -0.0035964776045793456, "calib/step_q_w": 0.24421732702542873, "calib/step_q_w_n": 1691.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1749.0, "completions/max_terminated_length": 1749.0, "completions/mean_length": 406.79296875, "completions/mean_terminated_length": 409.9960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.10666666666666667, "grad_norm": 0.008164642378687859, "learning_rate": 2.7777777777777783e-06, "loss": 0.0545, "num_tokens": 21615127.0, "reward": 0.9909152984619141, "reward_std": 0.1384955197572708, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7435492873191833, "rewards/format_reward_step": 0.98828125, "step": 100 }, { "aux_distill/final_loss": 0.018706669932953446, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09069530270062387, "aux_distill/mean_u": 0.27280208153168733, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 286.375, "aux_distill/step_loss": 0.8508329931646585, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5536755029140816, "calib/avg_num_step_conf": 8.94921875, "calib/ece": 0.17103505976095618, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.0103176724948299, "calib/mean_conf": 0.18338725099601594, "calib/mu_c": 0.19148518518518523, "calib/mu_w": 0.18116751269035533, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0696414342629482, "calib/std_conf": 0.23175196418842275, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22283809523809525, "calib/step_q_c_n": 483.0, "calib/step_q_gap": -0.023826838390223315, "calib/step_q_w": 0.24666493362831857, "calib/step_q_w_n": 1808.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2871.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 459.60546875, "completions/mean_terminated_length": 461.4078674316406, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.10773333333333333, "grad_norm": 0.008202540688216686, "learning_rate": 2.7500000000000004e-06, "loss": 0.1495, "num_tokens": 21839778.0, "reward": 0.9759368896484375, "reward_std": 0.16332665085792542, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7643738389015198, "rewards/format_reward_step": 0.9765625, "step": 101 }, { "aux_distill/final_loss": 0.0002810830437738332, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08407479338347912, "aux_distill/mean_u": 0.27899008717965257, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 245.5, "aux_distill/step_loss": 0.8399046696722507, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5584841917389087, "calib/avg_num_step_conf": 7.671875, "calib/ece": 0.25075393700787396, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.057459115247322806, "calib/mean_conf": 0.21979724409448817, "calib/mu_c": 0.25327735849056604, "calib/mu_w": 0.19581824324324323, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.026614173228346454, "calib/std_conf": 0.2480186680812846, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2932516788321168, "calib/step_q_c_n": 685.0, "calib/step_q_gap": 0.042074040051819694, "calib/step_q_w": 0.2511776387802971, "calib/step_q_w_n": 1279.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2430.0, "completions/max_terminated_length": 2430.0, "completions/mean_length": 377.953125, "completions/mean_terminated_length": 377.953125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.1088, "grad_norm": 0.01649659126996994, "learning_rate": 2.7222222222222224e-06, "loss": 0.1107, "num_tokens": 22043230.0, "reward": 1.040470838546753, "reward_std": 0.15088725090026855, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6785979866981506, "rewards/format_reward_step": 0.98828125, "step": 102 }, { "aux_distill/final_loss": 0.0065591775703524036, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09220340941101313, "aux_distill/mean_u": 0.3420191918866758, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 264.0, "aux_distill/step_loss": 0.9023565407842398, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.55, "calib/avg_num_step_conf": 8.44140625, "calib/ece": 0.21325, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": 0.0571962962962963, "calib/mean_conf": 0.20857539682539683, "calib/mu_c": 0.24534444444444445, "calib/mu_w": 0.18814814814814815, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03234126984126983, "calib/std_conf": 0.23171059507686365, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27322337042925277, "calib/step_q_c_n": 629.0, "calib/step_q_gap": 0.04654582473734678, "calib/step_q_w": 0.22667754569190599, "calib/step_q_w_n": 1532.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2127.0, "completions/max_terminated_length": 2127.0, "completions/mean_length": 437.36328125, "completions/mean_terminated_length": 440.8070983886719, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.10986666666666667, "grad_norm": 0.011347189545631409, "learning_rate": 2.6944444444444444e-06, "loss": 0.1159, "num_tokens": 22259747.0, "reward": 1.0227915048599243, "reward_std": 0.1551782190799713, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.7096454501152039, "rewards/format_reward_step": 0.984375, "step": 103 }, { "aux_distill/final_loss": 0.00048362685811298434, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0856669198255986, "aux_distill/mean_u": 0.28440768584848886, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 254.25, "aux_distill/step_loss": 0.8552182968705893, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5749327956989247, "calib/avg_num_step_conf": 7.9453125, "calib/ece": 0.16291999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.028, "calib/gap": 0.042560483870967725, "calib/mean_conf": 0.20395999999999997, "calib/mu_c": 0.235625, "calib/mu_w": 0.19306451612903228, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.055439999999999996, "calib/std_conf": 0.22977014253379396, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.29107221006564554, "calib/step_q_c_n": 457.0, "calib/step_q_gap": 0.05414132864522703, "calib/step_q_w": 0.23693088142041852, "calib/step_q_w_n": 1577.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 440.484375, "completions/mean_terminated_length": 440.484375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.11093333333333333, "grad_norm": 0.007789380848407745, "learning_rate": 2.666666666666667e-06, "loss": 0.1475, "num_tokens": 22479191.0, "reward": 0.985549807548523, "reward_std": 0.1652931571006775, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7484433650970459, "rewards/format_reward_step": 0.97265625, "step": 104 }, { "aux_distill/final_loss": 0.0016538519003006513, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0852151894941926, "aux_distill/mean_u": 0.28223207626261604, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 266.75, "aux_distill/step_loss": 0.8471903223544359, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5013830741626795, "calib/avg_num_step_conf": 8.5, "calib/ece": 0.23303968253968255, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": 0.010880980861244005, "calib/mean_conf": 0.1956111111111111, "calib/mu_c": 0.20321052631578948, "calib/mu_w": 0.19232954545454548, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06353174603174604, "calib/std_conf": 0.23826574040584683, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.23728816793893132, "calib/step_q_c_n": 524.0, "calib/step_q_gap": -0.006919277581649785, "calib/step_q_w": 0.2442074455205811, "calib/step_q_w_n": 1652.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2267.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 428.55859375, "completions/mean_terminated_length": 430.2392272949219, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.112, "grad_norm": 0.009293814189732075, "learning_rate": 2.6388888888888893e-06, "loss": 0.0938, "num_tokens": 22694662.0, "reward": 0.9901215434074402, "reward_std": 0.17139750719070435, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7068057060241699, "rewards/format_reward_step": 0.9765625, "step": 105 }, { "aux_distill/final_loss": 0.007102636158379028, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09073649870697409, "aux_distill/mean_u": 0.320093902056854, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 242.625, "aux_distill/step_loss": 0.8860570713877678, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5491796935458907, "calib/avg_num_step_conf": 7.58203125, "calib/ece": 0.20916996047430836, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.04743083003952569, "calib/gap": 0.02380746014548832, "calib/mean_conf": 0.19019762845849802, "calib/mu_c": 0.20732394366197182, "calib/mu_w": 0.1835164835164835, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.059367588932806324, "calib/std_conf": 0.2455106006145939, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27991543340380554, "calib/step_q_c_n": 473.0, "calib/step_q_gap": 0.06232258599236143, "calib/step_q_w": 0.2175928474114441, "calib/step_q_w_n": 1468.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2886.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 390.45703125, "completions/mean_terminated_length": 390.45703125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.11306666666666666, "grad_norm": 0.008583777584135532, "learning_rate": 2.6111111111111113e-06, "loss": 0.1858, "num_tokens": 22899203.0, "reward": 0.9981211423873901, "reward_std": 0.1512743979692459, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7306171655654907, "rewards/format_reward_step": 0.98828125, "step": 106 }, { "aux_distill/final_loss": 0.0010467911761224968, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08832863043062389, "aux_distill/mean_u": 0.31049923531388723, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 261.5, "aux_distill/step_loss": 0.880145912989974, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5397715888599479, "calib/avg_num_step_conf": 8.171875, "calib/ece": 0.2245708661417323, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.04653649903159021, "calib/mean_conf": 0.19928740157480315, "calib/mu_c": 0.22878494623655915, "calib/mu_w": 0.18224844720496894, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02885826771653544, "calib/std_conf": 0.20964296626847895, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29631616022099444, "calib/step_q_c_n": 724.0, "calib/step_q_gap": 0.06280446431456169, "calib/step_q_w": 0.23351169590643275, "calib/step_q_w_n": 1368.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2439.0, "completions/max_terminated_length": 2439.0, "completions/mean_length": 412.3125, "completions/mean_terminated_length": 412.3125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.11413333333333334, "grad_norm": 0.008329787291586399, "learning_rate": 2.5833333333333337e-06, "loss": 0.1044, "num_tokens": 23109371.0, "reward": 1.0337947607040405, "reward_std": 0.13733689486980438, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.712120771408081, "rewards/format_reward_step": 0.9921875, "step": 107 }, { "aux_distill/final_loss": 0.0005420635079644853, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08763206284493208, "aux_distill/mean_u": 0.3107295169673202, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 259.125, "aux_distill/step_loss": 0.874694412574172, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5471334454723857, "calib/avg_num_step_conf": 8.69921875, "calib/ece": 0.23366932270916338, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.040384076254555645, "calib/mean_conf": 0.20031474103585656, "calib/mu_c": 0.22670114942528735, "calib/mu_w": 0.1863170731707317, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04368525896414342, "calib/std_conf": 0.24403178990430135, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2722383808095952, "calib/step_q_c_n": 667.0, "calib/step_q_gap": 0.042300367989082405, "calib/step_q_w": 0.22993801282051282, "calib/step_q_w_n": 1560.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 436.9296875, "completions/mean_terminated_length": 440.3700866699219, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.1152, "grad_norm": 0.01860632747411728, "learning_rate": 2.5555555555555557e-06, "loss": 0.117, "num_tokens": 23324457.0, "reward": 1.0086462497711182, "reward_std": 0.1578787863254547, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.6969802379608154, "rewards/format_reward_step": 0.98046875, "step": 108 }, { "aux_distill/final_loss": 0.0005168746856725193, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08576538250781596, "aux_distill/mean_u": 0.2792536795486146, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 266.625, "aux_distill/step_loss": 0.8561031799763441, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5949438202247191, "calib/avg_num_step_conf": 8.9296875, "calib/ece": 0.22437751004016065, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": 0.06525280898876407, "calib/mean_conf": 0.18582329317269078, "calib/mu_c": 0.22775280898876407, "calib/mu_w": 0.1625, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.026385542168674697, "calib/std_conf": 0.22225800182919583, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2480131362889984, "calib/step_q_c_n": 609.0, "calib/step_q_gap": 0.05013001166168771, "calib/step_q_w": 0.1978831246273107, "calib/step_q_w_n": 1677.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 417.109375, "completions/mean_terminated_length": 422.05535888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.11626666666666667, "grad_norm": 0.008063085377216339, "learning_rate": 2.5277777777777778e-06, "loss": 0.133, "num_tokens": 23535837.0, "reward": 1.0110188722610474, "reward_std": 0.16892258822917938, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.7017253637313843, "rewards/format_reward_step": 0.97265625, "step": 109 }, { "aux_distill/final_loss": 0.00024006113483210356, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08656805171631277, "aux_distill/mean_u": 0.2881213946310252, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 249.625, "aux_distill/step_loss": 0.8649603221565485, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5057471264367815, "calib/avg_num_step_conf": 7.8125, "calib/ece": 0.2266043478260869, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.0034720209515495537, "calib/mean_conf": 0.1821703557312253, "calib/mu_c": 0.18455822784810128, "calib/mu_w": 0.18108620689655172, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0482608695652174, "calib/std_conf": 0.22355036100133238, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21149244935543282, "calib/step_q_c_n": 543.0, "calib/step_q_gap": -0.019882293266392848, "calib/step_q_w": 0.23137474262182567, "calib/step_q_w_n": 1457.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1710.0, "completions/max_terminated_length": 1710.0, "completions/mean_length": 407.296875, "completions/mean_terminated_length": 410.5039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.11733333333333333, "grad_norm": 0.01783120445907116, "learning_rate": 2.5e-06, "loss": 0.1019, "num_tokens": 23745025.0, "reward": 1.0041415691375732, "reward_std": 0.1339276283979416, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.711408257484436, "rewards/format_reward_step": 0.98828125, "step": 110 }, { "aux_distill/final_loss": 0.0016554806755948448, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08998130320105702, "aux_distill/mean_u": 0.31647358801054065, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 236.25, "aux_distill/step_loss": 0.8948465697467327, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5312062202297563, "calib/avg_num_step_conf": 7.609375, "calib/ece": 0.290515873015873, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": -0.0075217147660409656, "calib/mean_conf": 0.18972222222222224, "calib/mu_c": 0.1847674418604651, "calib/mu_w": 0.19228915662650606, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06948412698412698, "calib/std_conf": 0.22469980463851216, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2171139705882353, "calib/step_q_c_n": 544.0, "calib/step_q_gap": -0.01035696958270485, "calib/step_q_w": 0.22747094017094016, "calib/step_q_w_n": 1404.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 399.21875, "completions/mean_terminated_length": 402.3622131347656, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1184, "grad_norm": 0.03630237653851509, "learning_rate": 2.4722222222222226e-06, "loss": 0.1185, "num_tokens": 23954633.0, "reward": 1.0038787126541138, "reward_std": 0.14325237274169922, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.6874449253082275, "rewards/format_reward_step": 0.984375, "step": 111 }, { "aux_distill/final_loss": 0.0006655732486251509, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08622093067970127, "aux_distill/mean_u": 0.31761024345365596, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 249.25, "aux_distill/step_loss": 0.8602125756442547, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.523518411967779, "calib/avg_num_step_conf": 8.1953125, "calib/ece": 0.28016260162601625, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.024390243902439025, "calib/gap": 0.002517261219792871, "calib/mean_conf": 0.18406504065040652, "calib/mu_c": 0.18568181818181817, "calib/mu_w": 0.1831645569620253, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.053252032520325204, "calib/std_conf": 0.23073702131536272, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.21303691275167785, "calib/step_q_c_n": 596.0, "calib/step_q_gap": -0.01103106328027953, "calib/step_q_w": 0.22406797603195738, "calib/step_q_w_n": 1502.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 465.609375, "completions/mean_terminated_length": 471.1304626464844, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.11946666666666667, "grad_norm": 0.006373663432896137, "learning_rate": 2.4444444444444447e-06, "loss": 0.1884, "num_tokens": 24181749.0, "reward": 0.9829074144363403, "reward_std": 0.16640111804008484, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6611273288726807, "rewards/format_reward_step": 0.9609375, "step": 112 }, { "aux_distill/final_loss": 0.000554420957541879, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08675767807289958, "aux_distill/mean_u": 0.29539486317532077, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 270.875, "aux_distill/step_loss": 0.8659135028719902, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5118994989684644, "calib/avg_num_step_conf": 8.5, "calib/ece": 0.201984126984127, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.00556145004420866, "calib/mean_conf": 0.17603174603174604, "calib/mu_c": 0.17987179487179486, "calib/mu_w": 0.1743103448275862, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03424603174603175, "calib/std_conf": 0.1952006674668175, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23068181818181818, "calib/step_q_c_n": 616.0, "calib/step_q_gap": -0.003465617715617708, "calib/step_q_w": 0.2341474358974359, "calib/step_q_w_n": 1560.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2384.0, "completions/max_terminated_length": 2384.0, "completions/mean_length": 408.96875, "completions/mean_terminated_length": 410.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.12053333333333334, "grad_norm": 0.008550593629479408, "learning_rate": 2.4166666666666667e-06, "loss": 0.1097, "num_tokens": 24391645.0, "reward": 1.007127285003662, "reward_std": 0.13076680898666382, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.7212859392166138, "rewards/format_reward_step": 0.984375, "step": 113 }, { "aux_distill/final_loss": 0.012695347298631532, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08927124657202512, "aux_distill/mean_u": 0.28948008342312886, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 234.5, "aux_distill/step_loss": 0.8546264041215181, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5192509510691329, "calib/avg_num_step_conf": 7.328125, "calib/ece": 0.2774071146245059, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.018193362193362206, "calib/mean_conf": 0.17579446640316204, "calib/mu_c": 0.18686868686868688, "calib/mu_w": 0.16867532467532467, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030948616600790513, "calib/std_conf": 0.2068447443610172, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2424567073170732, "calib/step_q_c_n": 656.0, "calib/step_q_gap": 0.03398621551379449, "calib/step_q_w": 0.2084704918032787, "calib/step_q_w_n": 1220.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2374.0, "completions/max_terminated_length": 2374.0, "completions/mean_length": 374.21484375, "completions/mean_terminated_length": 375.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1216, "grad_norm": 0.00891240406781435, "learning_rate": 2.388888888888889e-06, "loss": 0.0763, "num_tokens": 24592468.0, "reward": 1.0202915668487549, "reward_std": 0.14734533429145813, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6694891452789307, "rewards/format_reward_step": 0.984375, "step": 114 }, { "aux_distill/final_loss": 0.0003174826269969344, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0851213710848242, "aux_distill/mean_u": 0.25555343441679196, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 241.0, "aux_distill/step_loss": 0.8502612486481667, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5386637901975325, "calib/avg_num_step_conf": 7.53125, "calib/ece": 0.2540590551181102, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.021638306478797303, "calib/mean_conf": 0.17105905511811023, "calib/mu_c": 0.18494505494505498, "calib/mu_w": 0.16330674846625767, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.033425196850393704, "calib/std_conf": 0.20976661303830954, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2170127795527157, "calib/step_q_c_n": 626.0, "calib/step_q_gap": 0.003680214268537485, "calib/step_q_w": 0.2133325652841782, "calib/step_q_w_n": 1302.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2418.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 392.8359375, "completions/mean_terminated_length": 392.8359375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.12266666666666666, "grad_norm": 0.0057081421837210655, "learning_rate": 2.361111111111111e-06, "loss": 0.0967, "num_tokens": 24798298.0, "reward": 1.0176787376403809, "reward_std": 0.12921567261219025, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6916075944900513, "rewards/format_reward_step": 0.98828125, "step": 115 }, { "aux_distill/final_loss": 0.001927710538438987, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08520996058359742, "aux_distill/mean_u": 0.29378952206272785, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 271.0, "aux_distill/step_loss": 0.8463164586573839, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5359243697478991, "calib/avg_num_step_conf": 8.46875, "calib/ece": 0.25269565217391304, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.003531092436974803, "calib/mean_conf": 0.16904347826086957, "calib/mu_c": 0.17138823529411767, "calib/mu_w": 0.16785714285714287, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04288537549407114, "calib/std_conf": 0.22152625913226384, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22823202614379084, "calib/step_q_c_n": 612.0, "calib/step_q_gap": 0.014645907891862792, "calib/step_q_w": 0.21358611825192805, "calib/step_q_w_n": 1556.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2725.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 443.3359375, "completions/mean_terminated_length": 443.3359375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.12373333333333333, "grad_norm": 0.006105185020714998, "learning_rate": 2.3333333333333336e-06, "loss": 0.1194, "num_tokens": 25016312.0, "reward": 1.003137230873108, "reward_std": 0.1414480358362198, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6898682117462158, "rewards/format_reward_step": 0.984375, "step": 116 }, { "aux_distill/final_loss": 0.00030499420995511173, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08492336596827954, "aux_distill/mean_u": 0.25434296480720475, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 273.375, "aux_distill/step_loss": 0.8483186531811953, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49249999999999994, "calib/avg_num_step_conf": 8.7734375, "calib/ece": 0.2265338645418327, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": 0.009617424242424233, "calib/mean_conf": 0.15258964143426293, "calib/mu_c": 0.15933333333333333, "calib/mu_w": 0.1497159090909091, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.040159362549800806, "calib/std_conf": 0.20871468480597213, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20097560975609757, "calib/step_q_c_n": 574.0, "calib/step_q_gap": -0.00384855292332828, "calib/step_q_w": 0.20482416267942585, "calib/step_q_w_n": 1672.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2215.0, "completions/max_terminated_length": 2215.0, "completions/mean_length": 409.35546875, "completions/mean_terminated_length": 412.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.1248, "grad_norm": 0.005732688121497631, "learning_rate": 2.305555555555556e-06, "loss": 0.1159, "num_tokens": 25227707.0, "reward": 0.9904880523681641, "reward_std": 0.13763518631458282, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7114449143409729, "rewards/format_reward_step": 0.9765625, "step": 117 }, { "aux_distill/final_loss": 0.0003068536104819941, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08029445493593812, "aux_distill/mean_u": 0.2487550132783027, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 262.0, "aux_distill/step_loss": 0.8020239789038897, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.498332692061049, "calib/avg_num_step_conf": 8.3515625, "calib/ece": 0.33394382470119527, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.00359008593048607, "calib/mean_conf": 0.14023944223107573, "calib/mu_c": 0.14221327433628317, "calib/mu_w": 0.1386231884057971, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01199203187250996, "calib/std_conf": 0.17313329525652227, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2245212658227848, "calib/step_q_c_n": 790.0, "calib/step_q_gap": 0.025418150095781827, "calib/step_q_w": 0.19910311572700298, "calib/step_q_w_n": 1348.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2570.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 421.07421875, "completions/mean_terminated_length": 422.72552490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.12586666666666665, "grad_norm": 0.006048675626516342, "learning_rate": 2.277777777777778e-06, "loss": 0.1085, "num_tokens": 25439510.0, "reward": 1.0189061164855957, "reward_std": 0.13250738382339478, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6159374713897705, "rewards/format_reward_step": 0.98046875, "step": 118 }, { "aux_distill/final_loss": 0.0008023383398949591, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08753173658624291, "aux_distill/mean_u": 0.31721302747280605, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 261.25, "aux_distill/step_loss": 0.8729103431105614, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5155487804878048, "calib/avg_num_step_conf": 8.1640625, "calib/ece": 0.25166338582677167, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.018104674796747933, "calib/mean_conf": 0.17164370078740157, "calib/mu_c": 0.18333333333333332, "calib/mu_w": 0.1652286585365854, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.034488188976377954, "calib/std_conf": 0.23148161952270715, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23532208588957051, "calib/step_q_c_n": 652.0, "calib/step_q_gap": 0.04372333762809624, "calib/step_q_w": 0.19159874826147427, "calib/step_q_w_n": 1438.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1905.0, "completions/max_terminated_length": 1905.0, "completions/mean_length": 431.40625, "completions/mean_terminated_length": 433.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.12693333333333334, "grad_norm": 0.005570216104388237, "learning_rate": 2.25e-06, "loss": 0.0804, "num_tokens": 25655014.0, "reward": 1.0154423713684082, "reward_std": 0.14006029069423676, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6871347427368164, "rewards/format_reward_step": 0.9921875, "step": 119 }, { "aux_distill/final_loss": 0.0002905957335315179, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08746440499089658, "aux_distill/mean_u": 0.26815594515693725, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 242.0, "aux_distill/step_loss": 0.873772244900465, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5239520958083832, "calib/avg_num_step_conf": 7.5625, "calib/ece": 0.2543280632411067, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.025198092187717624, "calib/mean_conf": 0.1657509881422925, "calib/mu_c": 0.18238372093023258, "calib/mu_w": 0.15718562874251496, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.040079051383399206, "calib/std_conf": 0.21775223720394693, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23026198083067093, "calib/step_q_c_n": 626.0, "calib/step_q_gap": 0.020536789990976273, "calib/step_q_w": 0.20972519083969465, "calib/step_q_w_n": 1310.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 397.1015625, "completions/mean_terminated_length": 397.1015625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.128, "grad_norm": 0.005384616553783417, "learning_rate": 2.222222222222222e-06, "loss": 0.1499, "num_tokens": 25863360.0, "reward": 1.012544870376587, "reward_std": 0.13414838910102844, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.7008709907531738, "rewards/format_reward_step": 0.98828125, "step": 120 }, { "aux_distill/final_loss": 0.00021624585497193038, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08460279006976634, "aux_distill/mean_u": 0.29540459878659797, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 278.625, "aux_distill/step_loss": 0.8453791439533234, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5453898584375703, "calib/avg_num_step_conf": 9.5234375, "calib/ece": 0.2160766129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.016129032258064516, "calib/gap": 0.026090405213092666, "calib/mean_conf": 0.16424596774193548, "calib/mu_c": 0.18202531645569622, "calib/mu_w": 0.15593491124260356, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.030887096774193552, "calib/std_conf": 0.20331306302508978, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.24472222222222223, "calib/step_q_c_n": 576.0, "calib/step_q_gap": 0.001519751760353294, "calib/step_q_w": 0.24320247046186894, "calib/step_q_w_n": 1862.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2422.0, "completions/max_terminated_length": 2422.0, "completions/mean_length": 455.2109375, "completions/mean_terminated_length": 462.4365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.12906666666666666, "grad_norm": 0.005154182203114033, "learning_rate": 2.1944444444444445e-06, "loss": 0.1066, "num_tokens": 26084950.0, "reward": 0.9918327927589417, "reward_std": 0.16375666856765747, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.7063218355178833, "rewards/format_reward_step": 0.96875, "step": 121 }, { "aux_distill/final_loss": 0.0027493372890603496, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08699899527709931, "aux_distill/mean_u": 0.32834142938597327, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 255.25, "aux_distill/step_loss": 0.8617419321089983, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5076948700866089, "calib/avg_num_step_conf": 8.13671875, "calib/ece": 0.313201581027668, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": -0.005973351099267166, "calib/mean_conf": 0.15509881422924898, "calib/mu_c": 0.15136842105263157, "calib/mu_w": 0.15734177215189873, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04640316205533598, "calib/std_conf": 0.22129981210235722, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20856300309597522, "calib/step_q_c_n": 646.0, "calib/step_q_gap": -0.0017779850738229575, "calib/step_q_w": 0.21034098816979818, "calib/step_q_w_n": 1437.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2927.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 401.8828125, "completions/mean_terminated_length": 403.4588623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.13013333333333332, "grad_norm": 0.00682408083230257, "learning_rate": 2.166666666666667e-06, "loss": 0.0917, "num_tokens": 26295176.0, "reward": 1.0083664655685425, "reward_std": 0.12590190768241882, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6573578119277954, "rewards/format_reward_step": 0.98828125, "step": 122 }, { "aux_distill/final_loss": 0.0003856645221276267, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08538874832447618, "aux_distill/mean_u": 0.3277538626466033, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 279.375, "aux_distill/step_loss": 0.8527304800227284, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5145488721804511, "calib/avg_num_step_conf": 8.73046875, "calib/ece": 0.248207171314741, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.035856573705179286, "calib/gap": 0.006730827067669226, "calib/mean_conf": 0.18478087649402392, "calib/mu_c": 0.18947368421052635, "calib/mu_w": 0.18274285714285712, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0650996015936255, "calib/std_conf": 0.22931617509497218, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.22892491467576792, "calib/step_q_c_n": 586.0, "calib/step_q_gap": 0.01985869272306931, "calib/step_q_w": 0.2090662219526986, "calib/step_q_w_n": 1649.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 494.69140625, "completions/mean_terminated_length": 496.63140869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.1312, "grad_norm": 0.005485665053129196, "learning_rate": 2.138888888888889e-06, "loss": 0.0867, "num_tokens": 26527105.0, "reward": 0.9922915697097778, "reward_std": 0.148990198969841, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7072394490242004, "rewards/format_reward_step": 0.9765625, "step": 123 }, { "aux_distill/final_loss": 0.00018249761569677503, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08871332253329456, "aux_distill/mean_u": 0.2713792308351267, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 236.75, "aux_distill/step_loss": 0.8865857180207968, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5352612886859462, "calib/avg_num_step_conf": 7.3984375, "calib/ece": 0.3010629921259842, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.014590309487569758, "calib/mean_conf": 0.19003937007874017, "calib/mu_c": 0.19842592592592595, "calib/mu_w": 0.1838356164383562, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03295275590551181, "calib/std_conf": 0.2211984145681818, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28493928128872364, "calib/step_q_c_n": 807.0, "calib/step_q_gap": 0.04179944688209991, "calib/step_q_w": 0.24313983440662373, "calib/step_q_w_n": 1087.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 405.3046875, "completions/mean_terminated_length": 408.4960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.13226666666666667, "grad_norm": 0.005629117600619793, "learning_rate": 2.1111111111111114e-06, "loss": 0.0595, "num_tokens": 26737679.0, "reward": 1.0337088108062744, "reward_std": 0.14394055306911469, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6533551216125488, "rewards/format_reward_step": 0.9921875, "step": 124 }, { "aux_distill/final_loss": 0.0003763869030990463, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08295424398966134, "aux_distill/mean_u": 0.25883708791289756, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 251.0, "aux_distill/step_loss": 0.8284132611006498, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.59375, "calib/avg_num_step_conf": 8.47265625, "calib/ece": 0.2242036, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": 0.08056602132435467, "calib/mean_conf": 0.17523640000000001, "calib/mu_c": 0.22744318181818182, "calib/mu_w": 0.14687716049382715, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.023719999999999998, "calib/std_conf": 0.22386651414412118, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.27544536423841054, "calib/step_q_c_n": 604.0, "calib/step_q_gap": 0.09403635465374599, "calib/step_q_w": 0.18140900958466455, "calib/step_q_w_n": 1565.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2935.0, "completions/max_terminated_length": 2935.0, "completions/mean_length": 435.14453125, "completions/mean_terminated_length": 442.0516052246094, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.13333333333333333, "grad_norm": 0.0054277521558105946, "learning_rate": 2.0833333333333334e-06, "loss": 0.0656, "num_tokens": 26953884.0, "reward": 1.0152812004089355, "reward_std": 0.15994369983673096, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.7102500200271606, "rewards/format_reward_step": 0.9765625, "step": 125 }, { "aux_distill/final_loss": 0.0012108890330182476, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08275235642213374, "aux_distill/mean_u": 0.29748186129550636, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 261.125, "aux_distill/step_loss": 0.8238908797502518, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46639110990649135, "calib/avg_num_step_conf": 8.3515625, "calib/ece": 0.2972111553784861, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": -0.012727334327144568, "calib/mean_conf": 0.15338645418326693, "calib/mu_c": 0.14542553191489363, "calib/mu_w": 0.1581528662420382, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03804780876494024, "calib/std_conf": 0.21326733045363888, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.21125499999999997, "calib/step_q_c_n": 600.0, "calib/step_q_gap": -0.003406898569570882, "calib/step_q_w": 0.21466189856957085, "calib/step_q_w_n": 1538.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2864.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 442.0859375, "completions/mean_terminated_length": 443.81964111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.1344, "grad_norm": 0.0057212673127651215, "learning_rate": 2.0555555555555555e-06, "loss": 0.1563, "num_tokens": 27172522.0, "reward": 0.9963690042495728, "reward_std": 0.1387261152267456, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.648987889289856, "rewards/format_reward_step": 0.9765625, "step": 126 }, { "aux_distill/final_loss": 0.009742825745547634, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08722131745889783, "aux_distill/mean_u": 0.271641276215777, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 276.5, "aux_distill/step_loss": 0.8429846893996, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.553265306122449, "calib/avg_num_step_conf": 8.84765625, "calib/ece": 0.28888218623481776, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.020242914979757085, "calib/gap": 0.037685510204081674, "calib/mean_conf": 0.15978178137651827, "calib/mu_c": 0.18221000000000004, "calib/mu_w": 0.14452448979591836, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.021902834008097165, "calib/std_conf": 0.211930038184977, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22816494845360827, "calib/step_q_c_n": 679.0, "calib/step_q_gap": 0.018543700029900828, "calib/step_q_w": 0.20962124842370744, "calib/step_q_w_n": 1586.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 469.25, "completions/mean_terminated_length": 476.69842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.13546666666666668, "grad_norm": 0.005118188913911581, "learning_rate": 2.027777777777778e-06, "loss": 0.1041, "num_tokens": 27396322.0, "reward": 1.0020356178283691, "reward_std": 0.1690288782119751, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.648602306842804, "rewards/format_reward_step": 0.96484375, "step": 127 }, { "aux_distill/final_loss": 0.01936214267766445, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09160724398680031, "aux_distill/mean_u": 0.31413110652738513, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 222.875, "aux_distill/step_loss": 0.8579859919846058, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5775668073136427, "calib/avg_num_step_conf": 7.75390625, "calib/ece": 0.24885483870967742, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.020161290322580645, "calib/gap": 0.04243825597749651, "calib/mean_conf": 0.1794516129032258, "calib/mu_c": 0.2064888888888889, "calib/mu_w": 0.16405063291139238, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03270161290322582, "calib/std_conf": 0.21362364610411902, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2690200364298725, "calib/step_q_c_n": 549.0, "calib/step_q_gap": 0.027638420830986665, "calib/step_q_w": 0.24138161559888582, "calib/step_q_w_n": 1436.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2282.0, "completions/max_terminated_length": 2282.0, "completions/mean_length": 403.2265625, "completions/mean_terminated_length": 412.9040222167969, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.13653333333333334, "grad_norm": 0.013368487358093262, "learning_rate": 2.0000000000000003e-06, "loss": 0.0488, "num_tokens": 27606212.0, "reward": 1.0036410093307495, "reward_std": 0.15976950526237488, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.686969518661499, "rewards/format_reward_step": 0.96875, "step": 128 }, { "aux_distill/final_loss": 0.00019079488697570923, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08709380659274757, "aux_distill/mean_u": 0.3124744532172144, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 247.0, "aux_distill/step_loss": 0.8703656606376171, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4672863666014352, "calib/avg_num_step_conf": 8.0234375, "calib/ece": 0.3206772908366534, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": -0.019532941943900844, "calib/mean_conf": 0.17717131474103587, "calib/mu_c": 0.16580952380952382, "calib/mu_w": 0.18534246575342467, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0397609561752988, "calib/std_conf": 0.21525017070818997, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22841322314049586, "calib/step_q_c_n": 726.0, "calib/step_q_gap": -0.013454246739022208, "calib/step_q_w": 0.24186746987951807, "calib/step_q_w_n": 1328.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 405.21484375, "completions/mean_terminated_length": 408.405517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.1376, "grad_norm": 0.005519295576959848, "learning_rate": 1.9722222222222224e-06, "loss": 0.1217, "num_tokens": 27812331.0, "reward": 1.0075377225875854, "reward_std": 0.162570059299469, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6283566355705261, "rewards/format_reward_step": 0.9765625, "step": 129 }, { "aux_distill/final_loss": 0.00011418962742482108, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08472625678405166, "aux_distill/mean_u": 0.2851624986802285, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 244.125, "aux_distill/step_loss": 0.8469199799001217, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5673688146578586, "calib/avg_num_step_conf": 7.71484375, "calib/ece": 0.3128729411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": 0.0630014209148698, "calib/mean_conf": 0.17418588235294113, "calib/mu_c": 0.20926902654867258, "calib/mu_w": 0.14626760563380278, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.021960784313725494, "calib/std_conf": 0.22636512886351978, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.24375283474065138, "calib/step_q_c_n": 829.0, "calib/step_q_gap": 0.04009663927817317, "calib/step_q_w": 0.2036561954624782, "calib/step_q_w_n": 1146.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 399.515625, "completions/mean_terminated_length": 401.0823669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.13866666666666666, "grad_norm": 0.005554182454943657, "learning_rate": 1.944444444444445e-06, "loss": 0.0985, "num_tokens": 28019895.0, "reward": 1.047834873199463, "reward_std": 0.1137884333729744, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6581695675849915, "rewards/format_reward_step": 0.99609375, "step": 130 }, { "aux_distill/final_loss": 0.01064502334855888, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08948624052572995, "aux_distill/mean_u": 0.27187465498469027, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 245.375, "aux_distill/step_loss": 0.8629273101687431, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5781611542126037, "calib/avg_num_step_conf": 7.9609375, "calib/ece": 0.2732669322709163, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05976095617529881, "calib/gap": 0.05499047230161974, "calib/mean_conf": 0.2004382470119522, "calib/mu_c": 0.23505376344086024, "calib/mu_w": 0.1800632911392405, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.051593625498007965, "calib/std_conf": 0.26497877681554816, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32565640194489465, "calib/step_q_c_n": 617.0, "calib/step_q_gap": 0.10542768977036962, "calib/step_q_w": 0.22022871217452503, "calib/step_q_w_n": 1421.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2725.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 393.03125, "completions/mean_terminated_length": 399.2698669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.13973333333333332, "grad_norm": 0.006183728575706482, "learning_rate": 1.916666666666667e-06, "loss": 0.0554, "num_tokens": 28226719.0, "reward": 1.0117428302764893, "reward_std": 0.1629297137260437, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.679735541343689, "rewards/format_reward_step": 0.98046875, "step": 131 }, { "aux_distill/final_loss": 0.0001528823884200392, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08295698906295002, "aux_distill/mean_u": 0.3197155654013685, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 232.375, "aux_distill/step_loss": 0.8291112314909697, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.540013765486172, "calib/avg_num_step_conf": 7.48046875, "calib/ece": 0.336798418972332, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.02670003754223496, "calib/mean_conf": 0.19814229249011858, "calib/mu_c": 0.21196721311475408, "calib/mu_w": 0.18526717557251912, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02636363636363636, "calib/std_conf": 0.2281046399975686, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2750613915416098, "calib/step_q_c_n": 733.0, "calib/step_q_gap": 0.03989303282756576, "calib/step_q_w": 0.23516835871404404, "calib/step_q_w_n": 1182.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 407.28515625, "completions/mean_terminated_length": 410.49212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.1408, "grad_norm": 0.006024093832820654, "learning_rate": 1.888888888888889e-06, "loss": 0.0639, "num_tokens": 28436576.0, "reward": 1.046138882637024, "reward_std": 0.1381235271692276, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6235277652740479, "rewards/format_reward_step": 0.98828125, "step": 132 }, { "aux_distill/final_loss": 0.00026432928763142627, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08952512557152659, "aux_distill/mean_u": 0.31743095958844497, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 273.625, "aux_distill/step_loss": 0.8944582510739565, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.44109696546913146, "calib/avg_num_step_conf": 8.55078125, "calib/ece": 0.22399598393574302, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": -0.04247209626787582, "calib/mean_conf": 0.20411646586345383, "calib/mu_c": 0.17204918032786884, "calib/mu_w": 0.21452127659574466, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09156626506024097, "calib/std_conf": 0.23439065340489612, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2150677618069815, "calib/step_q_c_n": 487.0, "calib/step_q_gap": -0.06189463537280698, "calib/step_q_w": 0.2769623971797885, "calib/step_q_w_n": 1702.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2808.0, "completions/max_terminated_length": 2808.0, "completions/mean_length": 502.01953125, "completions/mean_terminated_length": 505.9724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.14186666666666667, "grad_norm": 0.005406877491623163, "learning_rate": 1.8611111111111113e-06, "loss": 0.132, "num_tokens": 28671437.0, "reward": 0.9627663493156433, "reward_std": 0.16971050202846527, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7185014486312866, "rewards/format_reward_step": 0.96875, "step": 133 }, { "aux_distill/final_loss": 0.0013338901030692796, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08691820548847318, "aux_distill/mean_u": 0.30429836718858205, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 268.125, "aux_distill/step_loss": 0.8651803620159626, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.44983277591973236, "calib/avg_num_step_conf": 9.0390625, "calib/ece": 0.2911984126984126, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.031746031746031744, "calib/gap": -0.0318461538461538, "calib/mean_conf": 0.20364285714285713, "calib/mu_c": 0.18329670329670333, "calib/mu_w": 0.21514285714285714, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06686507936507936, "calib/std_conf": 0.24635614187483895, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24482191780821913, "calib/step_q_c_n": 730.0, "calib/step_q_gap": 0.010790983464784798, "calib/step_q_w": 0.23403093434343433, "calib/step_q_w_n": 1584.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2067.0, "completions/max_terminated_length": 2067.0, "completions/mean_length": 480.3984375, "completions/mean_terminated_length": 486.0948791503906, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.14293333333333333, "grad_norm": 0.006133591756224632, "learning_rate": 1.8333333333333333e-06, "loss": 0.0757, "num_tokens": 28903371.0, "reward": 0.9992485046386719, "reward_std": 0.15006408095359802, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6586532592773438, "rewards/format_reward_step": 0.984375, "step": 134 }, { "aux_distill/final_loss": 0.00716106703822561, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08925712550990283, "aux_distill/mean_u": 0.2880257740424917, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 262.25, "aux_distill/step_loss": 0.8710880391299725, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5321851310014629, "calib/avg_num_step_conf": 8.296875, "calib/ece": 0.2734192771084338, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.04417670682730924, "calib/gap": 0.048418499800505416, "calib/mean_conf": 0.20682168674698795, "calib/mu_c": 0.2352116504854369, "calib/mu_w": 0.1867931506849315, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03329317269076305, "calib/std_conf": 0.25990551699149583, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27213814285714283, "calib/step_q_c_n": 700.0, "calib/step_q_gap": 0.055028100722311346, "calib/step_q_w": 0.21711004213483148, "calib/step_q_w_n": 1424.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 481.2890625, "completions/mean_terminated_length": 485.0787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.144, "grad_norm": 0.006649309303611517, "learning_rate": 1.8055555555555557e-06, "loss": 0.1361, "num_tokens": 29132461.0, "reward": 1.0136375427246094, "reward_std": 0.1811089664697647, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.652275025844574, "rewards/format_reward_step": 0.97265625, "step": 135 }, { "aux_distill/final_loss": 0.0002608851482364116, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08260556554887444, "aux_distill/mean_u": 0.2542878665254145, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 268.875, "aux_distill/step_loss": 0.8252729810774326, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6085570469798658, "calib/avg_num_step_conf": 8.41796875, "calib/ece": 0.26313895582329316, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": 0.083625610738255, "calib/mean_conf": 0.19734297188755018, "calib/mu_c": 0.247384, "calib/mu_w": 0.163758389261745, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02943775100401607, "calib/std_conf": 0.2374326255752428, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.28552277777777774, "calib/step_q_c_n": 720.0, "calib/step_q_gap": 0.09411511227255129, "calib/step_q_w": 0.19140766550522645, "calib/step_q_w_n": 1435.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 465.14453125, "completions/mean_terminated_length": 466.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.14506666666666668, "grad_norm": 0.005239229649305344, "learning_rate": 1.777777777777778e-06, "loss": 0.1108, "num_tokens": 29360026.0, "reward": 1.0229345560073853, "reward_std": 0.1769467145204544, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6825878620147705, "rewards/format_reward_step": 0.97265625, "step": 136 }, { "aux_distill/final_loss": 0.00020748716309526571, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08588201878592372, "aux_distill/mean_u": 0.30829972212226764, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 257.0, "aux_distill/step_loss": 0.8581977151334286, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4878191856452726, "calib/avg_num_step_conf": 8.484375, "calib/ece": 0.27384860557768925, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.008970324361628729, "calib/mean_conf": 0.17802390438247012, "calib/mu_c": 0.18377777777777782, "calib/mu_w": 0.1748074534161491, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04665338645418326, "calib/std_conf": 0.2278685741342647, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2637964458804523, "calib/step_q_c_n": 619.0, "calib/step_q_gap": 0.0027934838714374632, "calib/step_q_w": 0.26100296200901485, "calib/step_q_w_n": 1553.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2972.0, "completions/max_terminated_length": 2972.0, "completions/mean_length": 441.84765625, "completions/mean_terminated_length": 447.08697509765625, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.14613333333333334, "grad_norm": 0.004869647789746523, "learning_rate": 1.75e-06, "loss": 0.1032, "num_tokens": 29580123.0, "reward": 1.0040864944458008, "reward_std": 0.15066415071487427, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6761415600776672, "rewards/format_reward_step": 0.98046875, "step": 137 }, { "aux_distill/final_loss": 0.009303503669798374, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08498471591155976, "aux_distill/mean_u": 0.24362162723192038, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 263.875, "aux_distill/step_loss": 0.8219366334378719, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5338783599653165, "calib/avg_num_step_conf": 8.41015625, "calib/ece": 0.34337254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.009773318468970615, "calib/mean_conf": 0.19607843137254902, "calib/mu_c": 0.2013675213675213, "calib/mu_w": 0.1915942028985507, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04031372549019609, "calib/std_conf": 0.24892615198616125, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27527794811320755, "calib/step_q_c_n": 848.0, "calib/step_q_gap": 0.013296338917805295, "calib/step_q_w": 0.26198160919540225, "calib/step_q_w_n": 1305.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 448.0625, "completions/mean_terminated_length": 449.81964111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1472, "grad_norm": 0.00609896844252944, "learning_rate": 1.7222222222222224e-06, "loss": 0.0715, "num_tokens": 29799163.0, "reward": 1.0381156206130981, "reward_std": 0.14251059293746948, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6231062412261963, "rewards/format_reward_step": 0.99609375, "step": 138 }, { "aux_distill/final_loss": 0.0001354755704596755, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08423372136894614, "aux_distill/mean_u": 0.2620757407991835, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 241.0, "aux_distill/step_loss": 0.8419307712465525, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5360669815371404, "calib/avg_num_step_conf": 7.60546875, "calib/ece": 0.33816406250000014, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0390625, "calib/gap": 0.02297797951297309, "calib/mean_conf": 0.1942578125, "calib/mu_c": 0.20655462184873952, "calib/mu_w": 0.18357664233576643, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0337890625, "calib/std_conf": 0.2400509303412816, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2670771001150748, "calib/step_q_c_n": 869.0, "calib/step_q_gap": 0.017206970244944914, "calib/step_q_w": 0.24987012987012988, "calib/step_q_w_n": 1078.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1630.0, "completions/max_terminated_length": 1630.0, "completions/mean_length": 416.671875, "completions/mean_terminated_length": 418.305908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.14826666666666666, "grad_norm": 0.005674412008374929, "learning_rate": 1.6944444444444446e-06, "loss": 0.0813, "num_tokens": 30008927.0, "reward": 1.048335313796997, "reward_std": 0.12911677360534668, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6318269968032837, "rewards/format_reward_step": 1.0, "step": 139 }, { "aux_distill/final_loss": 0.00025873837711287706, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08469209540635347, "aux_distill/mean_u": 0.2662521607237514, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 251.125, "aux_distill/step_loss": 0.8461447274312377, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5326923076923077, "calib/avg_num_step_conf": 8.37890625, "calib/ece": 0.35248799999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.052, "calib/gap": 0.019008974358974334, "calib/mean_conf": 0.19943200000000003, "calib/mu_c": 0.20931666666666665, "calib/mu_w": 0.19030769230769232, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03595999999999999, "calib/std_conf": 0.25057568392803, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25790625, "calib/step_q_c_n": 800.0, "calib/step_q_gap": -0.007058062267657983, "calib/step_q_w": 0.264964312267658, "calib/step_q_w_n": 1345.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3058.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 451.80078125, "completions/mean_terminated_length": 457.1581115722656, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.14933333333333335, "grad_norm": 0.005805363412946463, "learning_rate": 1.6666666666666667e-06, "loss": 0.1248, "num_tokens": 30229604.0, "reward": 1.0246009826660156, "reward_std": 0.1700487732887268, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6038893461227417, "rewards/format_reward_step": 0.9765625, "step": 140 }, { "aux_distill/final_loss": 0.03041997280342912, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09063981485087425, "aux_distill/mean_u": 0.26758513511730125, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 252.125, "aux_distill/step_loss": 0.8151382012292743, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5025196850393701, "calib/avg_num_step_conf": 7.91796875, "calib/ece": 0.38200793650793646, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.04365079365079365, "calib/gap": 0.0018508346456693192, "calib/mean_conf": 0.19640476190476192, "calib/mu_c": 0.19732283464566927, "calib/mu_w": 0.19547199999999995, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.037222222222222226, "calib/std_conf": 0.2570357481953437, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28213024282560706, "calib/step_q_c_n": 906.0, "calib/step_q_gap": 0.00719357913247598, "calib/step_q_w": 0.2749366636931311, "calib/step_q_w_n": 1121.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1602.0, "completions/max_terminated_length": 1602.0, "completions/mean_length": 447.83203125, "completions/mean_terminated_length": 451.3582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.1504, "grad_norm": 0.008275046944618225, "learning_rate": 1.638888888888889e-06, "loss": 0.0896, "num_tokens": 30451345.0, "reward": 1.0307620763778687, "reward_std": 0.15634740889072418, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5810553431510925, "rewards/format_reward_step": 0.984375, "step": 141 }, { "aux_distill/final_loss": 0.00018829970758815762, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0839127420913428, "aux_distill/mean_u": 0.2789330803613191, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 264.875, "aux_distill/step_loss": 0.8385625015944242, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4938867438867439, "calib/avg_num_step_conf": 8.60546875, "calib/ece": 0.31784860557768924, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": 0.005680180180180183, "calib/mean_conf": 0.16701195219123507, "calib/mu_c": 0.1701801801801802, "calib/mu_w": 0.1645, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.021314741035856576, "calib/std_conf": 0.22175197878753855, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22115, "calib/step_q_c_n": 800.0, "calib/step_q_gap": -0.04960046329294365, "calib/step_q_w": 0.27075046329294367, "calib/step_q_w_n": 1403.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2847.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 457.51953125, "completions/mean_terminated_length": 461.1220397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.15146666666666667, "grad_norm": 0.005080203525722027, "learning_rate": 1.6111111111111113e-06, "loss": 0.1318, "num_tokens": 30673630.0, "reward": 1.0164768695831299, "reward_std": 0.15442873537540436, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6188913583755493, "rewards/format_reward_step": 0.98046875, "step": 142 }, { "aux_distill/final_loss": 0.014434231996233393, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0871773383114487, "aux_distill/mean_u": 0.2575013332634439, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 244.25, "aux_distill/step_loss": 0.8284706622362137, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5594186195415546, "calib/avg_num_step_conf": 7.85546875, "calib/ece": 0.32182071713147414, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.045398002305032686, "calib/mean_conf": 0.17985258964143425, "calib/mu_c": 0.20463157894736844, "calib/mu_w": 0.15923357664233576, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.023745019920318716, "calib/std_conf": 0.22477721193051114, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27124333748443336, "calib/step_q_c_n": 803.0, "calib/step_q_gap": 0.05916138384204925, "calib/step_q_w": 0.2120819536423841, "calib/step_q_w_n": 1208.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2466.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 442.25390625, "completions/mean_terminated_length": 445.7362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.15253333333333333, "grad_norm": 0.006814547348767519, "learning_rate": 1.5833333333333333e-06, "loss": 0.0845, "num_tokens": 30894183.0, "reward": 1.0309672355651855, "reward_std": 0.158795565366745, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6361531019210815, "rewards/format_reward_step": 0.98046875, "step": 143 }, { "aux_distill/final_loss": 0.00011426453329477226, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08435902954079211, "aux_distill/mean_u": 0.2626129752490614, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 236.25, "aux_distill/step_loss": 0.8432474881410599, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.543829970472441, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.37552941176470583, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": 0.014280880905511828, "calib/mean_conf": 0.20078431372549022, "calib/mu_c": 0.20795275590551182, "calib/mu_w": 0.193671875, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03913725490196078, "calib/std_conf": 0.24704245199413424, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3039400665926748, "calib/step_q_c_n": 901.0, "calib/step_q_gap": 0.025193858301471583, "calib/step_q_w": 0.2787462082912032, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 428.921875, "completions/mean_terminated_length": 430.60394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.1536, "grad_norm": 0.005530585069209337, "learning_rate": 1.5555555555555558e-06, "loss": 0.1041, "num_tokens": 31108115.0, "reward": 1.0448781251907349, "reward_std": 0.1546696275472641, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6014750003814697, "rewards/format_reward_step": 0.9921875, "step": 144 }, { "aux_distill/final_loss": 0.00016966207363111607, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08153426856733859, "aux_distill/mean_u": 0.2747713157021657, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 263.375, "aux_distill/step_loss": 0.8148336801677942, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5083502667005334, "calib/avg_num_step_conf": 8.23046875, "calib/ece": 0.3741035856573705, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": -0.0009658369316738469, "calib/mean_conf": 0.179601593625498, "calib/mu_c": 0.17911290322580647, "calib/mu_w": 0.18007874015748032, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.029840637450199204, "calib/std_conf": 0.24324466851793605, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24022246941045605, "calib/step_q_c_n": 899.0, "calib/step_q_gap": 0.004320151529661337, "calib/step_q_w": 0.23590231788079472, "calib/step_q_w_n": 1208.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 445.2421875, "completions/mean_terminated_length": 448.7480163574219, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.15466666666666667, "grad_norm": 0.005234123673290014, "learning_rate": 1.527777777777778e-06, "loss": 0.1586, "num_tokens": 31324801.0, "reward": 1.022407054901123, "reward_std": 0.1666136085987091, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5799703598022461, "rewards/format_reward_step": 0.98046875, "step": 145 }, { "aux_distill/final_loss": 0.00016694465716682316, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08627592818811536, "aux_distill/mean_u": 0.2721878745015537, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 247.875, "aux_distill/step_loss": 0.8622584287077188, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.46952868103763246, "calib/avg_num_step_conf": 8.11328125, "calib/ece": 0.2880650406504066, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.008130081300813009, "calib/gap": -0.04064888564121302, "calib/mean_conf": 0.17754471544715447, "calib/mu_c": 0.15094117647058825, "calib/mu_w": 0.19159006211180127, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06004065040650406, "calib/std_conf": 0.22423902940717497, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2600762195121951, "calib/step_q_c_n": 656.0, "calib/step_q_gap": 0.010128295515010044, "calib/step_q_w": 0.24994792399718507, "calib/step_q_w_n": 1421.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 3071.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 442.890625, "completions/mean_terminated_length": 455.3413391113281, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.15573333333333333, "grad_norm": 0.0051604448817670345, "learning_rate": 1.5e-06, "loss": 0.0563, "num_tokens": 31545397.0, "reward": 0.9717497825622559, "reward_std": 0.16569262742996216, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6505308747291565, "rewards/format_reward_step": 0.9609375, "step": 146 }, { "aux_distill/final_loss": 0.00021439029717384983, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08166500553488731, "aux_distill/mean_u": 0.254837708526204, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 235.5, "aux_distill/step_loss": 0.8160068672150373, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5042112082928409, "calib/avg_num_step_conf": 7.4375, "calib/ece": 0.3267063492063492, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.008557823129251713, "calib/mean_conf": 0.17615079365079364, "calib/mu_c": 0.18114285714285713, "calib/mu_w": 0.17258503401360542, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.043095238095238096, "calib/std_conf": 0.2297822146080148, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2556688741721854, "calib/step_q_c_n": 755.0, "calib/step_q_gap": 0.030027185747468293, "calib/step_q_w": 0.22564168842471713, "calib/step_q_w_n": 1149.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2732.0, "completions/max_terminated_length": 2732.0, "completions/mean_length": 446.09765625, "completions/mean_terminated_length": 447.8470764160156, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.1568, "grad_norm": 0.00513486098498106, "learning_rate": 1.4722222222222225e-06, "loss": 0.1207, "num_tokens": 31763278.0, "reward": 1.0193654298782349, "reward_std": 0.1320839822292328, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6402933597564697, "rewards/format_reward_step": 0.984375, "step": 147 }, { "aux_distill/final_loss": 0.0015696126377520159, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08231577160768211, "aux_distill/mean_u": 0.2729051826363479, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 234.5, "aux_distill/step_loss": 0.8184488611295819, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5621848739495798, "calib/avg_num_step_conf": 7.6796875, "calib/ece": 0.37465863453815257, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.05571687136393019, "calib/mean_conf": 0.17152610441767072, "calib/mu_c": 0.19815384615384612, "calib/mu_w": 0.14243697478991593, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.012048192771084338, "calib/std_conf": 0.22889309407526154, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2675689851767389, "calib/step_q_c_n": 877.0, "calib/step_q_gap": 0.0674587923392733, "calib/step_q_w": 0.2001101928374656, "calib/step_q_w_n": 1089.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 435.62890625, "completions/mean_terminated_length": 442.5436706542969, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.15786666666666666, "grad_norm": 0.005812518764287233, "learning_rate": 1.4444444444444445e-06, "loss": 0.0476, "num_tokens": 31979911.0, "reward": 1.0257089138031006, "reward_std": 0.18910831212997437, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5787616968154907, "rewards/format_reward_step": 0.96484375, "step": 148 }, { "aux_distill/final_loss": 0.00033798599542933516, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08270317735150456, "aux_distill/mean_u": 0.24807493109380968, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 265.375, "aux_distill/step_loss": 0.8260177988559008, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5211913683938494, "calib/avg_num_step_conf": 8.74609375, "calib/ece": 0.32615038645418326, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.01249461022095874, "calib/mean_conf": 0.15953684063745022, "calib/mu_c": 0.16660550458715592, "calib/mu_w": 0.15411089436619718, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.025712139442231077, "calib/std_conf": 0.214172238802976, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.26460687960687956, "calib/step_q_c_n": 814.0, "calib/step_q_gap": -0.023786102849260804, "calib/step_q_w": 0.28839298245614037, "calib/step_q_w_n": 1425.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2641.0, "completions/max_terminated_length": 2641.0, "completions/mean_length": 498.17578125, "completions/mean_terminated_length": 502.0984191894531, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.15893333333333334, "grad_norm": 0.004999135155230761, "learning_rate": 1.4166666666666667e-06, "loss": 0.0984, "num_tokens": 32211900.0, "reward": 1.0136160850524902, "reward_std": 0.15585671365261078, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6248883008956909, "rewards/format_reward_step": 0.9765625, "step": 149 }, { "aux_distill/final_loss": 0.011310860400101319, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09089664276689291, "aux_distill/mean_u": 0.28932033106413374, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 231.0, "aux_distill/step_loss": 0.87503382563591, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5576985943118666, "calib/avg_num_step_conf": 7.66015625, "calib/ece": 0.31830604838709686, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.036290322580645164, "calib/gap": 0.04805155279503104, "calib/mean_conf": 0.1848391129032258, "calib/mu_c": 0.2106086956521739, "calib/mu_w": 0.16255714285714287, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019717741935483875, "calib/std_conf": 0.2426463524998287, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2433606557377049, "calib/step_q_c_n": 732.0, "calib/step_q_gap": -0.026084584294841867, "calib/step_q_w": 0.2694452400325468, "calib/step_q_w_n": 1229.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 401.83203125, "completions/mean_terminated_length": 411.47601318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.16, "grad_norm": 0.0059464010410010815, "learning_rate": 1.3888888888888892e-06, "loss": 0.0271, "num_tokens": 32419729.0, "reward": 1.018291711807251, "reward_std": 0.17183183133602142, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6186148524284363, "rewards/format_reward_step": 0.96875, "step": 150 }, { "aux_distill/final_loss": 0.0001667296775167415, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08787193079479039, "aux_distill/mean_u": 0.2994148822854776, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 254.25, "aux_distill/step_loss": 0.8782191015779972, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5647885101010102, "calib/avg_num_step_conf": 8.1484375, "calib/ece": 0.21665322580645163, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.036290322580645164, "calib/gap": 0.050997474747474764, "calib/mean_conf": 0.16366935483870967, "calib/mu_c": 0.19986111111111113, "calib/mu_w": 0.14886363636363636, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.045000000000000005, "calib/std_conf": 0.22754263059544128, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.25122652631578946, "calib/step_q_c_n": 475.0, "calib/step_q_gap": 0.041964577215851534, "calib/step_q_w": 0.20926194909993792, "calib/step_q_w_n": 1611.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 468.66796875, "completions/mean_terminated_length": 478.0039978027344, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.16106666666666666, "grad_norm": 0.004916236270219088, "learning_rate": 1.3611111111111112e-06, "loss": 0.0798, "num_tokens": 32646732.0, "reward": 0.9869068264961243, "reward_std": 0.16195714473724365, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7238136529922485, "rewards/format_reward_step": 0.96875, "step": 151 }, { "aux_distill/final_loss": 0.00022216508995143158, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08649269258603454, "aux_distill/mean_u": 0.26949824992257976, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 255.75, "aux_distill/step_loss": 0.8642604164779186, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5221854304635761, "calib/avg_num_step_conf": 8.57421875, "calib/ece": 0.29270916334661357, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.032715231788079474, "calib/mean_conf": 0.17031872509960158, "calib/mu_c": 0.19, "calib/mu_w": 0.15728476821192053, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.032310756972111554, "calib/std_conf": 0.23628916127706756, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23838709677419356, "calib/step_q_c_n": 744.0, "calib/step_q_gap": 0.029082479269024686, "calib/step_q_w": 0.20930461750516888, "calib/step_q_w_n": 1451.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2859.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 436.84765625, "completions/mean_terminated_length": 443.7817687988281, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.16213333333333332, "grad_norm": 0.00594055000692606, "learning_rate": 1.3333333333333334e-06, "loss": 0.0368, "num_tokens": 32863957.0, "reward": 1.0130953788757324, "reward_std": 0.16022071242332458, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.655097246170044, "rewards/format_reward_step": 0.98046875, "step": 152 }, { "aux_distill/final_loss": 0.00020855705406574998, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08855838561430573, "aux_distill/mean_u": 0.3327323315100791, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 248.0, "aux_distill/step_loss": 0.8849581722170115, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5059855322962679, "calib/avg_num_step_conf": 8.00390625, "calib/ece": 0.39192, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": -0.0032622751424364327, "calib/mean_conf": 0.17192, "calib/mu_c": 0.17031496062992127, "calib/mu_w": 0.1735772357723577, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02792, "calib/std_conf": 0.22538037536573588, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2698853211009174, "calib/step_q_c_n": 872.0, "calib/step_q_gap": 0.03537385126234477, "calib/step_q_w": 0.23451146983857263, "calib/step_q_w_n": 1177.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 470.8046875, "completions/mean_terminated_length": 476.3873596191406, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.1632, "grad_norm": 0.005652016494423151, "learning_rate": 1.3055555555555556e-06, "loss": 0.1091, "num_tokens": 33091803.0, "reward": 1.0181667804718018, "reward_std": 0.16404196619987488, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5675835609436035, "rewards/format_reward_step": 0.97265625, "step": 153 }, { "aux_distill/final_loss": 0.0014197659111232497, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08392115507740527, "aux_distill/mean_u": 0.27669696642811975, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 222.5, "aux_distill/step_loss": 0.8349522426724434, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44494995450409464, "calib/avg_num_step_conf": 6.953125, "calib/ece": 0.32632941176470587, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": -0.046509684128428375, "calib/mean_conf": 0.1985333333333333, "calib/mu_c": 0.16989795918367354, "calib/mu_w": 0.2164076433121019, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07027450980392155, "calib/std_conf": 0.2530479879054693, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25908695652173913, "calib/step_q_c_n": 690.0, "calib/step_q_gap": -0.017673685680095752, "calib/step_q_w": 0.2767606422018349, "calib/step_q_w_n": 1090.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2871.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 402.91796875, "completions/mean_terminated_length": 402.91796875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.16426666666666667, "grad_norm": 0.007197726983577013, "learning_rate": 1.2777777777777779e-06, "loss": 0.1208, "num_tokens": 33299390.0, "reward": 1.0057168006896973, "reward_std": 0.14716506004333496, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.636433482170105, "rewards/format_reward_step": 0.9921875, "step": 154 }, { "aux_distill/final_loss": 0.013410380759296459, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08857955387793481, "aux_distill/mean_u": 0.2451294186905701, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 210.875, "aux_distill/step_loss": 0.8455643802881241, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.46254082743603697, "calib/avg_num_step_conf": 6.6015625, "calib/ece": 0.27627450980392154, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": -0.04813078388677197, "calib/mean_conf": 0.1957254901960784, "calib/mu_c": 0.16420454545454544, "calib/mu_w": 0.2123353293413174, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06345098039215685, "calib/std_conf": 0.23118315279138232, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24561983471074378, "calib/step_q_c_n": 605.0, "calib/step_q_gap": -0.012149750542712445, "calib/step_q_w": 0.2577695852534562, "calib/step_q_w_n": 1085.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1083.0, "completions/max_terminated_length": 1083.0, "completions/mean_length": 392.8125, "completions/mean_terminated_length": 394.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.16533333333333333, "grad_norm": 0.007726035080850124, "learning_rate": 1.25e-06, "loss": 0.0851, "num_tokens": 33507166.0, "reward": 1.0068411827087402, "reward_std": 0.12614724040031433, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.67383873462677, "rewards/format_reward_step": 0.99609375, "step": 155 }, { "aux_distill/final_loss": 0.009609746949308828, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08573674759827554, "aux_distill/mean_u": 0.2627040019309309, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 251.875, "aux_distill/step_loss": 0.8285382241010666, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5053353119920855, "calib/avg_num_step_conf": 8.1875, "calib/ece": 0.2689887096774194, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.016129032258064516, "calib/gap": 0.015383986997385302, "calib/mean_conf": 0.176091935483871, "calib/mu_c": 0.18595505617977526, "calib/mu_w": 0.17057106918238996, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.043104838709677414, "calib/std_conf": 0.2296635068672785, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.25554209650582366, "calib/step_q_c_n": 601.0, "calib/step_q_gap": 0.00885728045231196, "calib/step_q_w": 0.2466848160535117, "calib/step_q_w_n": 1495.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 466.98828125, "completions/mean_terminated_length": 472.52569580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.1664, "grad_norm": 0.005722984671592712, "learning_rate": 1.2222222222222223e-06, "loss": 0.1773, "num_tokens": 33731475.0, "reward": 0.9889872670173645, "reward_std": 0.15785011649131775, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.665474534034729, "rewards/format_reward_step": 0.96484375, "step": 156 }, { "aux_distill/final_loss": 0.00023043508281261893, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08705176319926977, "aux_distill/mean_u": 0.29570876915511973, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 234.625, "aux_distill/step_loss": 0.8698263075202703, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5526010147319518, "calib/avg_num_step_conf": 7.38671875, "calib/ece": 0.34796875000000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": 0.046312121767834225, "calib/mean_conf": 0.157890625, "calib/mu_c": 0.18195121951219512, "calib/mu_w": 0.1356390977443609, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012695312499999997, "calib/std_conf": 0.20688596022231517, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25274021352313164, "calib/step_q_c_n": 843.0, "calib/step_q_gap": -0.0010670383852653398, "calib/step_q_w": 0.253807251908397, "calib/step_q_w_n": 1048.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 426.03125, "completions/mean_terminated_length": 427.7019958496094, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.16746666666666668, "grad_norm": 0.005689004436135292, "learning_rate": 1.1944444444444446e-06, "loss": 0.0896, "num_tokens": 33944267.0, "reward": 1.053556203842163, "reward_std": 0.11408466100692749, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.626643717288971, "rewards/format_reward_step": 1.0, "step": 157 }, { "aux_distill/final_loss": 0.011379823592960747, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08688726811669767, "aux_distill/mean_u": 0.28276713935726544, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 221.75, "aux_distill/step_loss": 0.8347331937402487, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5043114543114543, "calib/avg_num_step_conf": 7.3359375, "calib/ece": 0.45290796812749007, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": -0.015451994851994888, "calib/mean_conf": 0.14390478087649403, "calib/mu_c": 0.13707142857142857, "calib/mu_w": 0.15252342342342345, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01952191235059761, "calib/std_conf": 0.20253795605387614, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25101149425287356, "calib/step_q_c_n": 870.0, "calib/step_q_gap": -0.02702808908045984, "calib/step_q_w": 0.2780395833333334, "calib/step_q_w_n": 1008.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2836.0, "completions/max_terminated_length": 2836.0, "completions/mean_length": 398.9609375, "completions/mean_terminated_length": 403.69171142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.16853333333333334, "grad_norm": 0.0059169139713048935, "learning_rate": 1.1666666666666668e-06, "loss": 0.1087, "num_tokens": 34151641.0, "reward": 1.025167465209961, "reward_std": 0.1500907689332962, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5229910612106323, "rewards/format_reward_step": 0.98046875, "step": 158 }, { "aux_distill/final_loss": 0.011764087983692662, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08607646217569709, "aux_distill/mean_u": 0.2612328965109033, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 233.375, "aux_distill/step_loss": 0.8254723343998194, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5078044707091469, "calib/avg_num_step_conf": 7.53515625, "calib/ece": 0.3394900398406374, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.011125000000000024, "calib/mean_conf": 0.16696414342629481, "calib/mu_c": 0.173125, "calib/mu_w": 0.16199999999999998, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030119521912350594, "calib/std_conf": 0.2246467849695932, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25579856115107913, "calib/step_q_c_n": 695.0, "calib/step_q_gap": 0.005628382869069359, "calib/step_q_w": 0.25017017828200977, "calib/step_q_w_n": 1234.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2003.0, "completions/max_terminated_length": 2003.0, "completions/mean_length": 398.84375, "completions/mean_terminated_length": 405.17462158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.1696, "grad_norm": 0.006524218712002039, "learning_rate": 1.138888888888889e-06, "loss": 0.0829, "num_tokens": 34358529.0, "reward": 1.0178043842315674, "reward_std": 0.16306878626346588, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6176400780677795, "rewards/format_reward_step": 0.98046875, "step": 159 }, { "aux_distill/final_loss": 0.0002871268879971467, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0842723447130993, "aux_distill/mean_u": 0.2597806498301868, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 231.625, "aux_distill/step_loss": 0.8418620582669973, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5746911991765311, "calib/avg_num_step_conf": 7.7109375, "calib/ece": 0.3167199999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.032, "calib/gap": 0.09083376222336595, "calib/mean_conf": 0.16751999999999997, "calib/mu_c": 0.21620689655172415, "calib/mu_w": 0.1253731343283582, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01012, "calib/std_conf": 0.23234338725257492, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2789416993464052, "calib/step_q_c_n": 765.0, "calib/step_q_gap": 0.07038090530174024, "calib/step_q_w": 0.208560794044665, "calib/step_q_w_n": 1209.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2948.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 435.4765625, "completions/mean_terminated_length": 440.64031982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.17066666666666666, "grad_norm": 0.0052119893953204155, "learning_rate": 1.111111111111111e-06, "loss": 0.1062, "num_tokens": 34574851.0, "reward": 1.0305633544921875, "reward_std": 0.1793225109577179, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6353453397750854, "rewards/format_reward_step": 0.97265625, "step": 160 }, { "aux_distill/final_loss": 0.02305936315406143, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08618140441831201, "aux_distill/mean_u": 0.25902356828065554, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 227.25, "aux_distill/step_loss": 0.7926359372213483, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4836674849972722, "calib/avg_num_step_conf": 7.390625, "calib/ece": 0.4576392, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.016, "calib/gap": 0.004739416257501322, "calib/mean_conf": 0.17668079999999997, "calib/mu_c": 0.1784628205128205, "calib/mu_w": 0.17372340425531918, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005159999999999999, "calib/std_conf": 0.22137640134250985, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26015403846153845, "calib/step_q_c_n": 1040.0, "calib/step_q_gap": 0.024963897616468028, "calib/step_q_w": 0.23519014084507042, "calib/step_q_w_n": 852.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2316.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 455.765625, "completions/mean_terminated_length": 459.3543395996094, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.17173333333333332, "grad_norm": 0.006831606384366751, "learning_rate": 1.0833333333333335e-06, "loss": 0.1544, "num_tokens": 34795447.0, "reward": 1.0461416244506836, "reward_std": 0.17791378498077393, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.5063456892967224, "rewards/format_reward_step": 0.9765625, "step": 161 }, { "aux_distill/final_loss": 0.02008447791160961, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0881789909908548, "aux_distill/mean_u": 0.2446974276610731, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 213.875, "aux_distill/step_loss": 0.8215364571660757, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5499038461538461, "calib/avg_num_step_conf": 6.68359375, "calib/ece": 0.45469685039370084, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.025658205128205103, "calib/mean_conf": 0.18294094488188975, "calib/mu_c": 0.19344666666666666, "calib/mu_w": 0.16778846153846155, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.023543307086614174, "calib/std_conf": 0.23472678131568997, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.26096555323590814, "calib/step_q_c_n": 958.0, "calib/step_q_gap": 0.03366143637003832, "calib/step_q_w": 0.22730411686586982, "calib/step_q_w_n": 753.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1590.0, "completions/max_terminated_length": 1590.0, "completions/mean_length": 397.76171875, "completions/mean_terminated_length": 400.8937072753906, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.1728, "grad_norm": 0.008068778552114964, "learning_rate": 1.0555555555555557e-06, "loss": 0.0763, "num_tokens": 35001418.0, "reward": 1.0518012046813965, "reward_std": 0.1718236356973648, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5371962189674377, "rewards/format_reward_step": 0.98046875, "step": 162 }, { "aux_distill/final_loss": 0.010780644172427856, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08723368018399924, "aux_distill/mean_u": 0.30619983125294814, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 251.375, "aux_distill/step_loss": 0.8399948664009571, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6165784718637067, "calib/avg_num_step_conf": 8.07421875, "calib/ece": 0.2794466403162056, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.07764068146618483, "calib/mean_conf": 0.18735177865612646, "calib/mu_c": 0.2330769230769231, "calib/mu_w": 0.15543624161073827, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.027865612648221343, "calib/std_conf": 0.24792519026349885, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2701359516616314, "calib/step_q_c_n": 662.0, "calib/step_q_gap": 0.05466264205309046, "calib/step_q_w": 0.21547330960854094, "calib/step_q_w_n": 1405.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2388.0, "completions/max_terminated_length": 2388.0, "completions/mean_length": 458.11328125, "completions/mean_terminated_length": 461.720458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.17386666666666667, "grad_norm": 0.005950353108346462, "learning_rate": 1.0277777777777777e-06, "loss": 0.0649, "num_tokens": 35223527.0, "reward": 1.0352507829666138, "reward_std": 0.13757582008838654, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6759703159332275, "rewards/format_reward_step": 0.98828125, "step": 163 }, { "aux_distill/final_loss": 0.011660294957778206, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08999121165834367, "aux_distill/mean_u": 0.2645727204325913, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 251.125, "aux_distill/step_loss": 0.8649312127381563, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5822498029944838, "calib/avg_num_step_conf": 8.3515625, "calib/ece": 0.2913253012048192, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": 0.07727935382190704, "calib/mean_conf": 0.17855421686746986, "calib/mu_c": 0.2223148148148148, "calib/mu_w": 0.14503546099290776, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01807228915662651, "calib/std_conf": 0.2300661640302972, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3109315068493151, "calib/step_q_c_n": 730.0, "calib/step_q_gap": 0.07417184775840596, "calib/step_q_w": 0.23675965909090912, "calib/step_q_w_n": 1408.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2192.0, "completions/max_terminated_length": 2192.0, "completions/mean_length": 480.796875, "completions/mean_terminated_length": 488.4285888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.17493333333333333, "grad_norm": 0.005459357984364033, "learning_rate": 1.0000000000000002e-06, "loss": 0.0759, "num_tokens": 35452747.0, "reward": 1.0251986980438232, "reward_std": 0.1771778166294098, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6558664441108704, "rewards/format_reward_step": 0.97265625, "step": 164 }, { "aux_distill/final_loss": 0.002788644074826152, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08677215431816876, "aux_distill/mean_u": 0.2987145792600321, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 240.625, "aux_distill/step_loss": 0.8593556005507708, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46673183447377, "calib/avg_num_step_conf": 7.51953125, "calib/ece": 0.31954291338582674, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": -0.0377904138155751, "calib/mean_conf": 0.18053582677165353, "calib/mu_c": 0.15747474747474746, "calib/mu_w": 0.19526516129032256, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05515748031496063, "calib/std_conf": 0.23720780852472764, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2422222222222222, "calib/step_q_c_n": 729.0, "calib/step_q_gap": -0.024287476774433298, "calib/step_q_w": 0.2665096989966555, "calib/step_q_w_n": 1196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2579.0, "completions/max_terminated_length": 2579.0, "completions/mean_length": 463.6953125, "completions/mean_terminated_length": 463.6953125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.176, "grad_norm": 0.005407247692346573, "learning_rate": 9.722222222222224e-07, "loss": 0.1211, "num_tokens": 35677029.0, "reward": 1.009002685546875, "reward_std": 0.13533712923526764, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.63909912109375, "rewards/format_reward_step": 0.9921875, "step": 165 }, { "aux_distill/final_loss": 0.00879952699824571, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08791387150995433, "aux_distill/mean_u": 0.30051730178953384, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 249.0, "aux_distill/step_loss": 0.8527401220053434, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5694085743801653, "calib/avg_num_step_conf": 7.78515625, "calib/ece": 0.3532530120481927, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.0736621900826446, "calib/mean_conf": 0.18232931726907634, "calib/mu_c": 0.21812499999999996, "calib/mu_w": 0.14446280991735536, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01076305220883534, "calib/std_conf": 0.22007710401306826, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29197546012269937, "calib/step_q_c_n": 815.0, "calib/step_q_gap": 0.06068513754205418, "calib/step_q_w": 0.2312903225806452, "calib/step_q_w_n": 1178.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 511.38671875, "completions/mean_terminated_length": 515.4133911132812, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.17706666666666668, "grad_norm": 0.0055147032253444195, "learning_rate": 9.444444444444445e-07, "loss": 0.1683, "num_tokens": 35914128.0, "reward": 1.0380951166152954, "reward_std": 0.1842813938856125, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6074402332305908, "rewards/format_reward_step": 0.96875, "step": 166 }, { "aux_distill/final_loss": 0.010343943324642169, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08901638758834451, "aux_distill/mean_u": 0.32507894317157265, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 250.875, "aux_distill/step_loss": 0.8591320309787989, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5233385826771654, "calib/avg_num_step_conf": 8.05859375, "calib/ece": 0.40205555555555555, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.004040692913385879, "calib/mean_conf": 0.18445238095238095, "calib/mu_c": 0.18645669291338585, "calib/mu_w": 0.18241599999999997, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04126984126984126, "calib/std_conf": 0.24745055966487134, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.25750778816199377, "calib/step_q_c_n": 963.0, "calib/step_q_gap": 0.03046051543472103, "calib/step_q_w": 0.22704727272727274, "calib/step_q_w_n": 1100.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2727.0, "completions/max_terminated_length": 2727.0, "completions/mean_length": 459.44140625, "completions/mean_terminated_length": 463.0590515136719, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.17813333333333334, "grad_norm": 0.005532347597181797, "learning_rate": 9.166666666666666e-07, "loss": 0.092, "num_tokens": 36137353.0, "reward": 1.0235297679901123, "reward_std": 0.15561902523040771, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5744034051895142, "rewards/format_reward_step": 0.9765625, "step": 167 }, { "aux_distill/final_loss": 0.00022977958269621013, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08777423610445112, "aux_distill/mean_u": 0.31815578652243576, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 258.5, "aux_distill/step_loss": 0.8770530074834824, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.42734788359788356, "calib/avg_num_step_conf": 8.41796875, "calib/ece": 0.350978313253012, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.024096385542168676, "calib/gap": -0.0628418253968254, "calib/mean_conf": 0.1810698795180723, "calib/mu_c": 0.14472761904761905, "calib/mu_w": 0.20756944444444445, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05518072289156627, "calib/std_conf": 0.24071936988230022, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21596614285714286, "calib/step_q_c_n": 700.0, "calib/step_q_gap": -0.022157568483063356, "calib/step_q_w": 0.2381237113402062, "calib/step_q_w_n": 1455.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2648.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 488.1171875, "completions/mean_terminated_length": 493.9051513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1792, "grad_norm": 0.004909490700811148, "learning_rate": 8.88888888888889e-07, "loss": 0.1241, "num_tokens": 36366983.0, "reward": 0.9878915548324585, "reward_std": 0.16598960757255554, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5929707288742065, "rewards/format_reward_step": 0.97265625, "step": 168 }, { "aux_distill/final_loss": 0.0002530997130634205, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08274583891034126, "aux_distill/mean_u": 0.26228293206042697, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 229.125, "aux_distill/step_loss": 0.8266990734264255, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4402116402116403, "calib/avg_num_step_conf": 7.859375, "calib/ece": 0.3647327935222672, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.044534412955465584, "calib/gap": -0.037627314814814766, "calib/mean_conf": 0.19575303643724695, "calib/mu_c": 0.17518750000000002, "calib/mu_w": 0.2128148148148148, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.053522267206477736, "calib/std_conf": 0.25050771065591476, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22687836107554418, "calib/step_q_c_n": 781.0, "calib/step_q_gap": 0.0034665007993459895, "calib/step_q_w": 0.2234118602761982, "calib/step_q_w_n": 1231.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 427.90625, "completions/mean_terminated_length": 438.176025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.18026666666666666, "grad_norm": 0.005392855033278465, "learning_rate": 8.611111111111112e-07, "loss": 0.0505, "num_tokens": 36580711.0, "reward": 0.9927282929420471, "reward_std": 0.17945927381515503, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5831128358840942, "rewards/format_reward_step": 0.96484375, "step": 169 }, { "aux_distill/final_loss": 0.000465752248373974, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08292529941536486, "aux_distill/mean_u": 0.2634956625427439, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 250.5, "aux_distill/step_loss": 0.827855721116066, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.45680815160955346, "calib/avg_num_step_conf": 8.375, "calib/ece": 0.3522310756972111, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": -0.0643068535825545, "calib/mean_conf": 0.2117529880478088, "calib/mu_c": 0.17485981308411214, "calib/mu_w": 0.23916666666666664, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06884462151394423, "calib/std_conf": 0.25651222321441575, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2737416879795396, "calib/step_q_c_n": 782.0, "calib/step_q_gap": 0.017442495615369313, "calib/step_q_w": 0.2562991923641703, "calib/step_q_w_n": 1362.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2360.0, "completions/max_terminated_length": 2360.0, "completions/mean_length": 456.97265625, "completions/mean_terminated_length": 462.3913269042969, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.18133333333333335, "grad_norm": 0.00585015956312418, "learning_rate": 8.333333333333333e-07, "loss": 0.0625, "num_tokens": 36801848.0, "reward": 0.9958788752555847, "reward_std": 0.18888312578201294, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5972265601158142, "rewards/format_reward_step": 0.9765625, "step": 170 }, { "aux_distill/final_loss": 0.01088693686347142, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08538034278899431, "aux_distill/mean_u": 0.26036624686611903, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 242.375, "aux_distill/step_loss": 0.8211425989866257, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6052903137039075, "calib/avg_num_step_conf": 7.57421875, "calib/ece": 0.27348, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.032, "calib/gap": 0.06020776004402861, "calib/mean_conf": 0.18684, "calib/mu_c": 0.2248913043478261, "calib/mu_w": 0.16468354430379747, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04616000000000001, "calib/std_conf": 0.24582598398053854, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.26345521023765994, "calib/step_q_c_n": 547.0, "calib/step_q_gap": 0.050797164260648425, "calib/step_q_w": 0.21265804597701152, "calib/step_q_w_n": 1392.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 468.95703125, "completions/mean_terminated_length": 470.7961120605469, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.1824, "grad_norm": 0.00639989972114563, "learning_rate": 8.055555555555557e-07, "loss": 0.1331, "num_tokens": 37028797.0, "reward": 1.010830283164978, "reward_std": 0.17146757245063782, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.685723066329956, "rewards/format_reward_step": 0.9765625, "step": 171 }, { "aux_distill/final_loss": 0.00034214909805996285, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08222295809537172, "aux_distill/mean_u": 0.22643506384524986, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 228.5, "aux_distill/step_loss": 0.8212031172588468, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4602171704343409, "calib/avg_num_step_conf": 7.4609375, "calib/ece": 0.38784860557768935, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.05976095617529881, "calib/gap": -0.02461772923545849, "calib/mean_conf": 0.20697211155378487, "calib/mu_c": 0.19451612903225804, "calib/mu_w": 0.21913385826771653, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05039840637450199, "calib/std_conf": 0.26717422583647216, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2627016645326504, "calib/step_q_c_n": 781.0, "calib/step_q_gap": 0.02922070793389045, "calib/step_q_w": 0.23348095659875998, "calib/step_q_w_n": 1129.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 444.09765625, "completions/mean_terminated_length": 449.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.18346666666666667, "grad_norm": 0.005332083906978369, "learning_rate": 7.777777777777779e-07, "loss": 0.099, "num_tokens": 37245838.0, "reward": 1.015856385231018, "reward_std": 0.17418715357780457, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5707753896713257, "rewards/format_reward_step": 0.9765625, "step": 172 }, { "aux_distill/final_loss": 0.00040317631521702424, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08041488449089229, "aux_distill/mean_u": 0.23427894932425683, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 234.125, "aux_distill/step_loss": 0.8029392957687378, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5300831202046036, "calib/avg_num_step_conf": 7.40234375, "calib/ece": 0.36290836653386443, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": -0.00978644501278772, "calib/mean_conf": 0.2147808764940239, "calib/mu_c": 0.20947826086956522, "calib/mu_w": 0.21926470588235294, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05976095617529881, "calib/std_conf": 0.2703580549591661, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32210151691948663, "calib/step_q_c_n": 857.0, "calib/step_q_gap": 0.04899939745898568, "calib/step_q_w": 0.27310211946050095, "calib/step_q_w_n": 1038.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2371.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 445.33984375, "completions/mean_terminated_length": 450.6205749511719, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.18453333333333333, "grad_norm": 0.00578544195741415, "learning_rate": 7.5e-07, "loss": 0.0811, "num_tokens": 37463005.0, "reward": 1.0161224603652954, "reward_std": 0.1734548807144165, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6025574207305908, "rewards/format_reward_step": 0.98046875, "step": 173 }, { "aux_distill/final_loss": 0.004824105270472501, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08477919397410005, "aux_distill/mean_u": 0.27541401115578373, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 229.75, "aux_distill/step_loss": 0.8333196099847555, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5181578947368422, "calib/avg_num_step_conf": 7.1953125, "calib/ece": 0.3273809523809524, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": -0.027063157894736817, "calib/mean_conf": 0.17952380952380953, "calib/mu_c": 0.1632, "calib/mu_w": 0.19026315789473683, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05503968253968254, "calib/std_conf": 0.2326549906432308, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.23540055248618785, "calib/step_q_c_n": 724.0, "calib/step_q_gap": -0.008065816029017875, "calib/step_q_w": 0.24346636851520573, "calib/step_q_w_n": 1118.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 474.96875, "completions/mean_terminated_length": 476.8313903808594, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.1856, "grad_norm": 0.005676957778632641, "learning_rate": 7.222222222222222e-07, "loss": 0.1244, "num_tokens": 37688829.0, "reward": 1.003028154373169, "reward_std": 0.16048721969127655, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6349625587463379, "rewards/format_reward_step": 0.98046875, "step": 174 }, { "aux_distill/final_loss": 0.0006413969849745627, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08384919434320182, "aux_distill/mean_u": 0.27767284051410174, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 230.5, "aux_distill/step_loss": 0.8365677306428552, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.584026910805937, "calib/avg_num_step_conf": 7.5234375, "calib/ece": 0.257093625498008, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.05179282868525897, "calib/gap": 0.05811516853932583, "calib/mean_conf": 0.20035657370517929, "calib/mu_c": 0.23786516853932585, "calib/mu_w": 0.17975000000000002, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.051434262948207174, "calib/std_conf": 0.25878976174392915, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.30354651162790697, "calib/step_q_c_n": 516.0, "calib/step_q_gap": 0.08340360382648854, "calib/step_q_w": 0.22014290780141843, "calib/step_q_w_n": 1410.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1701.0, "completions/max_terminated_length": 1701.0, "completions/mean_length": 424.7421875, "completions/mean_terminated_length": 431.4841613769531, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.18666666666666668, "grad_norm": 0.005720601882785559, "learning_rate": 6.944444444444446e-07, "loss": 0.0035, "num_tokens": 37903387.0, "reward": 1.0067589282989502, "reward_std": 0.1756170243024826, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6892991065979004, "rewards/format_reward_step": 0.9765625, "step": 175 }, { "aux_distill/final_loss": 0.005468177675879815, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09025713754817843, "aux_distill/mean_u": 0.3034085317416427, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 235.75, "aux_distill/step_loss": 0.8861668314784765, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4952225214826216, "calib/avg_num_step_conf": 7.42578125, "calib/ece": 0.35828685258964144, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": -0.019343337180967035, "calib/mean_conf": 0.16505976095617533, "calib/mu_c": 0.15442477876106195, "calib/mu_w": 0.17376811594202898, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.036573705179282864, "calib/std_conf": 0.21698104369059154, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.21498727735368958, "calib/step_q_c_n": 786.0, "calib/step_q_gap": -0.026922677803261102, "calib/step_q_w": 0.24190995515695068, "calib/step_q_w_n": 1115.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 455.3671875, "completions/mean_terminated_length": 457.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.18773333333333334, "grad_norm": 0.00540440808981657, "learning_rate": 6.666666666666667e-07, "loss": 0.1231, "num_tokens": 38124025.0, "reward": 1.0089466571807861, "reward_std": 0.1375408172607422, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5999245643615723, "rewards/format_reward_step": 0.9765625, "step": 176 }, { "aux_distill/final_loss": 0.0003030462994502159, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0822085008258, "aux_distill/mean_u": 0.2613613272395983, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 241.5, "aux_distill/step_loss": 0.8211758583784103, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6229437229437229, "calib/avg_num_step_conf": 8.3203125, "calib/ece": 0.2645927419354839, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.028225806451612902, "calib/gap": 0.10634052614052619, "calib/mean_conf": 0.18935887096774193, "calib/mu_c": 0.2506761904761905, "calib/mu_w": 0.1443356643356643, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.015282258064516129, "calib/std_conf": 0.24182821032378066, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3088253731343284, "calib/step_q_c_n": 670.0, "calib/step_q_gap": 0.12053085258638321, "calib/step_q_w": 0.18829452054794518, "calib/step_q_w_n": 1460.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2960.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 433.6796875, "completions/mean_terminated_length": 442.3187255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.1888, "grad_norm": 0.005008679814636707, "learning_rate": 6.388888888888889e-07, "loss": 0.0994, "num_tokens": 38338879.0, "reward": 1.0278247594833374, "reward_std": 0.1698625236749649, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.67283695936203, "rewards/format_reward_step": 0.96875, "step": 177 }, { "aux_distill/final_loss": 0.01449744992100932, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0894948864588514, "aux_distill/mean_u": 0.2893229335467064, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 244.5, "aux_distill/step_loss": 0.8514564950019121, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5032993785636491, "calib/avg_num_step_conf": 8.0078125, "calib/ece": 0.36684000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": 0.03321224934332756, "calib/mean_conf": 0.17715999999999998, "calib/mu_c": 0.19429752066115702, "calib/mu_w": 0.16108527131782946, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.030000000000000006, "calib/std_conf": 0.24741126570954688, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29893296853625173, "calib/step_q_c_n": 731.0, "calib/step_q_gap": 0.04867413608742682, "calib/step_q_w": 0.2502588324488249, "calib/step_q_w_n": 1319.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2712.0, "completions/max_terminated_length": 2712.0, "completions/mean_length": 450.5703125, "completions/mean_terminated_length": 455.9130554199219, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.18986666666666666, "grad_norm": 0.0061003523878753185, "learning_rate": 6.111111111111112e-07, "loss": 0.1086, "num_tokens": 38560297.0, "reward": 1.0192785263061523, "reward_std": 0.18458163738250732, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5932445526123047, "rewards/format_reward_step": 0.97265625, "step": 178 }, { "aux_distill/final_loss": 0.006415818309505994, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08816517679952085, "aux_distill/mean_u": 0.29011263899831896, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 244.625, "aux_distill/step_loss": 0.8624042980372906, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4635210908627502, "calib/avg_num_step_conf": 7.90234375, "calib/ece": 0.3507171314741036, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": -0.01652414935644192, "calib/mean_conf": 0.20290836653386454, "calib/mu_c": 0.1941525423728814, "calib/mu_w": 0.21067669172932332, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04175298804780876, "calib/std_conf": 0.2431465127511987, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.272459649122807, "calib/step_q_c_n": 855.0, "calib/step_q_gap": -0.027865693342946363, "calib/step_q_w": 0.3003253424657534, "calib/step_q_w_n": 1168.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 423.0, "completions/mean_terminated_length": 426.3307189941406, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.19093333333333334, "grad_norm": 0.005853515118360519, "learning_rate": 5.833333333333334e-07, "loss": 0.1235, "num_tokens": 38774849.0, "reward": 1.02079439163208, "reward_std": 0.1717761904001236, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6001824140548706, "rewards/format_reward_step": 0.98046875, "step": 179 }, { "aux_distill/final_loss": 0.014565010391379474, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08634517737664282, "aux_distill/mean_u": 0.3125829259806991, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 254.5, "aux_distill/step_loss": 0.8197567202150822, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47761194029850745, "calib/avg_num_step_conf": 8.05078125, "calib/ece": 0.4042125984251968, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": -0.00991293532338311, "calib/mean_conf": 0.17893700787401576, "calib/mu_c": 0.17425373134328356, "calib/mu_w": 0.18416666666666667, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02779527559055117, "calib/std_conf": 0.2301370097921813, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25770000000000004, "calib/step_q_c_n": 1100.0, "calib/step_q_gap": 0.03146378772112385, "calib/step_q_w": 0.2262362122788762, "calib/step_q_w_n": 961.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1812.0, "completions/max_terminated_length": 1812.0, "completions/mean_length": 474.96484375, "completions/mean_terminated_length": 476.8274841308594, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.192, "grad_norm": 0.006038557272404432, "learning_rate": 5.555555555555555e-07, "loss": 0.0336, "num_tokens": 39000296.0, "reward": 1.041239619255066, "reward_std": 0.1422034502029419, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5668542981147766, "rewards/format_reward_step": 0.9921875, "step": 180 }, { "aux_distill/final_loss": 0.004080855029542363, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0861173152225092, "aux_distill/mean_u": 0.26857485475773873, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 253.0, "aux_distill/step_loss": 0.8489305796101689, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47044448847500164, "calib/avg_num_step_conf": 7.98046875, "calib/ece": 0.3474572, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": -0.03863629218677764, "calib/mean_conf": 0.1844628, "calib/mu_c": 0.16174466019417474, "calib/mu_w": 0.20038095238095238, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05996000000000001, "calib/std_conf": 0.24661849844681155, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2557909502262444, "calib/step_q_c_n": 663.0, "calib/step_q_gap": -0.005140933831726646, "calib/step_q_w": 0.260931884057971, "calib/step_q_w_n": 1380.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 460.7109375, "completions/mean_terminated_length": 462.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.19306666666666666, "grad_norm": 0.0069434866309165955, "learning_rate": 5.277777777777779e-07, "loss": 0.1898, "num_tokens": 39224502.0, "reward": 0.995327353477478, "reward_std": 0.17556922137737274, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6117483973503113, "rewards/format_reward_step": 0.9765625, "step": 181 }, { "aux_distill/final_loss": 0.0004914569452694195, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08198971545789391, "aux_distill/mean_u": 0.2469632960280557, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 266.625, "aux_distill/step_loss": 0.8184227719902992, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5324568089430894, "calib/avg_num_step_conf": 8.7109375, "calib/ece": 0.38900398406374503, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05179282868525897, "calib/gap": 0.006344004065040598, "calib/mean_conf": 0.19282868525896416, "calib/mu_c": 0.1959375, "calib/mu_w": 0.1895934959349594, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03593625498007968, "calib/std_conf": 0.25463740045606115, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2811877133105802, "calib/step_q_c_n": 879.0, "calib/step_q_gap": 0.025394967196590512, "calib/step_q_w": 0.25579274611398967, "calib/step_q_w_n": 1351.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 477.7734375, "completions/mean_terminated_length": 483.4387512207031, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.19413333333333332, "grad_norm": 0.005205181427299976, "learning_rate": 5.000000000000001e-07, "loss": 0.1092, "num_tokens": 39452972.0, "reward": 1.028422236442566, "reward_std": 0.17331251502037048, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5763757824897766, "rewards/format_reward_step": 0.98046875, "step": 182 }, { "aux_distill/final_loss": 0.012090174134300469, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08561442897189409, "aux_distill/mean_u": 0.25096491237758906, "aux_distill/n_active_final_tok": 30.375, "aux_distill/n_active_tok": 238.125, "aux_distill/step_loss": 0.8198737557977438, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5145338532435306, "calib/avg_num_step_conf": 7.56640625, "calib/ece": 0.2773170731707317, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.02032520325203252, "calib/gap": 0.007271180432470797, "calib/mean_conf": 0.18552845528455283, "calib/mu_c": 0.19010989010989016, "calib/mu_w": 0.18283870967741936, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.04646341463414634, "calib/std_conf": 0.24589211479249604, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.2803473861720068, "calib/step_q_c_n": 593.0, "calib/step_q_gap": 0.024075064743435348, "calib/step_q_w": 0.2562723214285714, "calib/step_q_w_n": 1344.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3024.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 499.11328125, "completions/mean_terminated_length": 505.0316467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.1952, "grad_norm": 0.005737299099564552, "learning_rate": 4.7222222222222226e-07, "loss": 0.1189, "num_tokens": 39687425.0, "reward": 0.9754921793937683, "reward_std": 0.20419049263000488, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6423906087875366, "rewards/format_reward_step": 0.94921875, "step": 183 }, { "aux_distill/final_loss": 0.021152566654563998, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09282559237908572, "aux_distill/mean_u": 0.30204077793520356, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 220.125, "aux_distill/step_loss": 0.8647981937974691, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49422831632653064, "calib/avg_num_step_conf": 7.43359375, "calib/ece": 0.353554761904762, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.04365079365079365, "calib/gap": -0.01856571428571427, "calib/mean_conf": 0.19049285714285713, "calib/mu_c": 0.18017857142857144, "calib/mu_w": 0.1987442857142857, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.049801587301587305, "calib/std_conf": 0.24813894938712833, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24649655172413795, "calib/step_q_c_n": 725.0, "calib/step_q_gap": 0.02393563413705213, "calib/step_q_w": 0.22256091758708582, "calib/step_q_w_n": 1177.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2555.0, "completions/max_terminated_length": 2555.0, "completions/mean_length": 429.87890625, "completions/mean_terminated_length": 434.9762878417969, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.19626666666666667, "grad_norm": 0.0066966889426112175, "learning_rate": 4.444444444444445e-07, "loss": 0.0652, "num_tokens": 39902754.0, "reward": 1.0150374174118042, "reward_std": 0.15881051123142242, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6081998348236084, "rewards/format_reward_step": 0.984375, "step": 184 }, { "aux_distill/final_loss": 0.011273953358568178, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08622706448659301, "aux_distill/mean_u": 0.2745509519465374, "aux_distill/n_active_final_tok": 30.25, "aux_distill/n_active_tok": 282.75, "aux_distill/step_loss": 0.8284487714990973, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5679812652728754, "calib/avg_num_step_conf": 9.86328125, "calib/ece": 0.3407572016460905, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.03292181069958848, "calib/gap": 0.0603627477599783, "calib/mean_conf": 0.16500411522633743, "calib/mu_c": 0.19655172413793104, "calib/mu_w": 0.13618897637795274, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.014197530864197531, "calib/std_conf": 0.22225491422383706, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29534482758620695, "calib/step_q_c_n": 812.0, "calib/step_q_gap": 0.08387897819916668, "calib/step_q_w": 0.21146584938704027, "calib/step_q_w_n": 1713.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2871.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 488.78515625, "completions/mean_terminated_length": 504.5523986816406, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.19733333333333333, "grad_norm": 0.0053436290472745895, "learning_rate": 4.1666666666666667e-07, "loss": 0.0342, "num_tokens": 40134803.0, "reward": 0.9941251277923584, "reward_std": 0.17589130997657776, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5937190055847168, "rewards/format_reward_step": 0.94140625, "step": 185 }, { "aux_distill/final_loss": 0.0003723709241967299, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08567919465713203, "aux_distill/mean_u": 0.3135761017896143, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 258.25, "aux_distill/step_loss": 0.8556748200207949, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5021263091082196, "calib/avg_num_step_conf": 8.109375, "calib/ece": 0.37051587301587297, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": -0.011473817835607775, "calib/mean_conf": 0.18615079365079365, "calib/mu_c": 0.17991304347826084, "calib/mu_w": 0.19138686131386862, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05015873015873015, "calib/std_conf": 0.2517948764390519, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2676820388349515, "calib/step_q_c_n": 824.0, "calib/step_q_gap": 0.021260313595334895, "calib/step_q_w": 0.2464217252396166, "calib/step_q_w_n": 1252.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 475.16015625, "completions/mean_terminated_length": 480.79449462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.1984, "grad_norm": 0.004991094581782818, "learning_rate": 3.8888888888888895e-07, "loss": 0.0461, "num_tokens": 40361484.0, "reward": 1.0169349908828735, "reward_std": 0.15747611224651337, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6002761721611023, "rewards/format_reward_step": 0.984375, "step": 186 }, { "aux_distill/final_loss": 0.023335972212407796, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08891357877291739, "aux_distill/mean_u": 0.2898039838522789, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 323.375, "aux_distill/step_loss": 0.8191278586164117, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5362859468258274, "calib/avg_num_step_conf": 10.30859375, "calib/ece": 0.2736546184738956, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.040160642570281124, "calib/gap": 0.03664609332609872, "calib/mean_conf": 0.17381526104417672, "calib/mu_c": 0.19618556701030926, "calib/mu_w": 0.15953947368421054, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02895582329317269, "calib/std_conf": 0.23625685006968905, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2699151193633952, "calib/step_q_c_n": 754.0, "calib/step_q_gap": 0.04861559681697611, "calib/step_q_w": 0.22129952254641908, "calib/step_q_w_n": 1885.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2957.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 542.12109375, "completions/mean_terminated_length": 546.3897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.19946666666666665, "grad_norm": 0.006230460945516825, "learning_rate": 3.611111111111111e-07, "loss": 0.1089, "num_tokens": 40601811.0, "reward": 1.0012633800506592, "reward_std": 0.17673474550247192, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6548705697059631, "rewards/format_reward_step": 0.96875, "step": 187 }, { "aux_distill/final_loss": 0.00041703841679918696, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0835479978704825, "aux_distill/mean_u": 0.2888137193823023, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 249.625, "aux_distill/step_loss": 0.8342288453131914, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5186848958333333, "calib/avg_num_step_conf": 8.39453125, "calib/ece": 0.37031733870967737, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.04838709677419355, "calib/gap": 0.022301666666666692, "calib/mean_conf": 0.20202137096774195, "calib/mu_c": 0.21281250000000002, "calib/mu_w": 0.19051083333333332, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.028104838709677418, "calib/std_conf": 0.25268674327864055, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.25762168141592917, "calib/step_q_c_n": 904.0, "calib/step_q_gap": -0.0039044230017415416, "calib/step_q_w": 0.2615261044176707, "calib/step_q_w_n": 1245.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 483.0390625, "completions/mean_terminated_length": 492.661376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.20053333333333334, "grad_norm": 0.005076941102743149, "learning_rate": 3.3333333333333335e-07, "loss": 0.11, "num_tokens": 40829541.0, "reward": 1.0205557346343994, "reward_std": 0.17612425982952118, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5762676000595093, "rewards/format_reward_step": 0.96484375, "step": 188 }, { "aux_distill/final_loss": 0.0004297056702853297, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.0847643711604178, "aux_distill/mean_u": 0.2911292515522893, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 227.25, "aux_distill/step_loss": 0.8463545646518469, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5033221559860905, "calib/avg_num_step_conf": 7.1015625, "calib/ece": 0.4162, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": -0.016480104321907607, "calib/mean_conf": 0.1904929133858268, "calib/mu_c": 0.18257727272727273, "calib/mu_w": 0.19905737704918033, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04350393700787401, "calib/std_conf": 0.25277430198707995, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26531559633027524, "calib/step_q_c_n": 872.0, "calib/step_q_gap": 0.002392763349302718, "calib/step_q_w": 0.2629228329809725, "calib/step_q_w_n": 946.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2726.0, "completions/max_terminated_length": 2726.0, "completions/mean_length": 440.05859375, "completions/mean_terminated_length": 441.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.2016, "grad_norm": 0.0055082193575799465, "learning_rate": 3.055555555555556e-07, "loss": 0.0893, "num_tokens": 41049964.0, "reward": 1.0366290807724, "reward_std": 0.14958468079566956, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5654455423355103, "rewards/format_reward_step": 0.9921875, "step": 189 }, { "aux_distill/final_loss": 0.003996539462718829, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08100927027408034, "aux_distill/mean_u": 0.27667689884397945, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 267.125, "aux_distill/step_loss": 0.798103079199791, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5777813095994914, "calib/avg_num_step_conf": 8.4296875, "calib/ece": 0.3763346613545816, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.04955626191989826, "calib/mean_conf": 0.18318725099601596, "calib/mu_c": 0.20707692307692308, "calib/mu_w": 0.15752066115702482, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02079681274900398, "calib/std_conf": 0.23040454807515817, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2715440776699029, "calib/step_q_c_n": 1030.0, "calib/step_q_gap": -0.02017578048612545, "calib/step_q_w": 0.29171985815602836, "calib/step_q_w_n": 1128.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 512.62890625, "completions/mean_terminated_length": 516.6653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.20266666666666666, "grad_norm": 0.005521820392459631, "learning_rate": 2.7777777777777776e-07, "loss": 0.1038, "num_tokens": 41286805.0, "reward": 1.0431492328643799, "reward_std": 0.17249631881713867, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5980172157287598, "rewards/format_reward_step": 0.98046875, "step": 190 }, { "aux_distill/final_loss": 0.0003640562290456728, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08253474766388535, "aux_distill/mean_u": 0.24002532989861367, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 249.125, "aux_distill/step_loss": 0.8242552913725376, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5344702467343976, "calib/avg_num_step_conf": 8.171875, "calib/ece": 0.3100803212851406, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0321285140562249, "calib/gap": 0.019625280379997334, "calib/mean_conf": 0.19457831325301206, "calib/mu_c": 0.2058490566037736, "calib/mu_w": 0.18622377622377626, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.039477911646586344, "calib/std_conf": 0.2494828444005057, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26378666666666667, "calib/step_q_c_n": 750.0, "calib/step_q_gap": 0.04716967709885739, "calib/step_q_w": 0.21661698956780928, "calib/step_q_w_n": 1342.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 453.68359375, "completions/mean_terminated_length": 459.0632629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.20373333333333332, "grad_norm": 0.004961589351296425, "learning_rate": 2.5000000000000004e-07, "loss": 0.1371, "num_tokens": 41507116.0, "reward": 1.0092079639434814, "reward_std": 0.18711496889591217, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6316972970962524, "rewards/format_reward_step": 0.97265625, "step": 191 }, { "aux_distill/final_loss": 0.0005855521337707614, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08320717338938266, "aux_distill/mean_u": 0.26376013196602055, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 248.5, "aux_distill/step_loss": 0.8303150627762079, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5571975194616703, "calib/avg_num_step_conf": 8.33203125, "calib/ece": 0.33935742971887556, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.03614457831325301, "calib/gap": 0.03959229449795487, "calib/mean_conf": 0.170281124497992, "calib/mu_c": 0.1930188679245283, "calib/mu_w": 0.15342657342657343, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.041967871485943775, "calib/std_conf": 0.24059064136409902, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.25987341772151895, "calib/step_q_c_n": 711.0, "calib/step_q_gap": 0.06210267229254565, "calib/step_q_w": 0.1977707454289733, "calib/step_q_w_n": 1422.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 488.56640625, "completions/mean_terminated_length": 498.298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.2048, "grad_norm": 0.0054542748257517815, "learning_rate": 2.2222222222222224e-07, "loss": 0.1064, "num_tokens": 41737165.0, "reward": 1.0025842189788818, "reward_std": 0.18376731872558594, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6301683187484741, "rewards/format_reward_step": 0.9609375, "step": 192 }, { "aux_distill/final_loss": 0.00048713741512074193, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08641538047231734, "aux_distill/mean_u": 0.28739638625110747, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 259.5, "aux_distill/step_loss": 0.8626923765987158, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4753813219159754, "calib/avg_num_step_conf": 8.24609375, "calib/ece": 0.3026506024096386, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": -0.00766189456783517, "calib/mean_conf": 0.1842570281124498, "calib/mu_c": 0.1797029702970297, "calib/mu_w": 0.18736486486486487, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.040642570281124495, "calib/std_conf": 0.24419269129671786, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2507959479015919, "calib/step_q_c_n": 691.0, "calib/step_q_gap": -0.01978151688714047, "calib/step_q_w": 0.27057746478873235, "calib/step_q_w_n": 1420.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2651.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 477.5859375, "completions/mean_terminated_length": 483.2490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.20586666666666667, "grad_norm": 0.00497024180367589, "learning_rate": 1.9444444444444447e-07, "loss": 0.1456, "num_tokens": 41965139.0, "reward": 0.9942156076431274, "reward_std": 0.17856940627098083, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6251500248908997, "rewards/format_reward_step": 0.96875, "step": 193 }, { "aux_distill/final_loss": 0.0007990040708136803, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.083697130670771, "aux_distill/mean_u": 0.29038782347307623, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 236.125, "aux_distill/step_loss": 0.8345742803066969, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5239338842975206, "calib/avg_num_step_conf": 8.125, "calib/ece": 0.3923211382113821, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.056910569105691054, "calib/gap": -0.007213818181818155, "calib/mean_conf": 0.20548373983739837, "calib/mu_c": 0.20181818181818184, "calib/mu_w": 0.209032, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.052967479674796744, "calib/std_conf": 0.2682096879821403, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2718048780487805, "calib/step_q_c_n": 820.0, "calib/step_q_gap": 0.0067818621757646524, "calib/step_q_w": 0.26502301587301585, "calib/step_q_w_n": 1260.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2707.0, "completions/max_terminated_length": 2707.0, "completions/mean_length": 435.71484375, "completions/mean_terminated_length": 447.9638366699219, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.20693333333333333, "grad_norm": 0.0060769012197852135, "learning_rate": 1.6666666666666668e-07, "loss": 0.0943, "num_tokens": 42182626.0, "reward": 1.001477837562561, "reward_std": 0.20108719170093536, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5693619251251221, "rewards/format_reward_step": 0.9609375, "step": 194 }, { "aux_distill/final_loss": 0.015865576947589943, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08864119986537844, "aux_distill/mean_u": 0.3035396885680345, "aux_distill/n_active_final_tok": 30.375, "aux_distill/n_active_tok": 236.75, "aux_distill/step_loss": 0.8388152550905943, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4720672268907563, "calib/avg_num_step_conf": 7.91796875, "calib/ece": 0.3810040983606558, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.06967213114754098, "calib/gap": -0.027625210084033652, "calib/mean_conf": 0.23112704918032786, "calib/mu_c": 0.2169747899159664, "calib/mu_w": 0.24460000000000004, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06221311475409836, "calib/std_conf": 0.29212207658613815, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.28189119170984456, "calib/step_q_c_n": 772.0, "calib/step_q_gap": 0.018205932745701148, "calib/step_q_w": 0.2636852589641434, "calib/step_q_w_n": 1255.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 444.05859375, "completions/mean_terminated_length": 454.7160339355469, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.208, "grad_norm": 0.006210810970515013, "learning_rate": 1.3888888888888888e-07, "loss": 0.113, "num_tokens": 42402289.0, "reward": 0.9839684963226318, "reward_std": 0.22606565058231354, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5538744926452637, "rewards/format_reward_step": 0.94921875, "step": 195 }, { "aux_distill/final_loss": 0.021385181895766436, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.09157576691359282, "aux_distill/mean_u": 0.2631465701960212, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 210.25, "aux_distill/step_loss": 0.851602103561163, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5476810515873016, "calib/avg_num_step_conf": 6.5703125, "calib/ece": 0.35964173228346463, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.035935515873015855, "calib/mean_conf": 0.18626377952755901, "calib/mu_c": 0.20437301587301587, "calib/mu_w": 0.16843750000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02492125984251968, "calib/std_conf": 0.24281806259568714, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.276373417721519, "calib/step_q_c_n": 790.0, "calib/step_q_gap": -0.003379945507180515, "calib/step_q_w": 0.27975336322869954, "calib/step_q_w_n": 892.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 385.05078125, "completions/mean_terminated_length": 386.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.20906666666666668, "grad_norm": 0.006784021854400635, "learning_rate": 1.1111111111111112e-07, "loss": 0.0949, "num_tokens": 42603406.0, "reward": 1.0427067279815674, "reward_std": 0.13762742280960083, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6049444675445557, "rewards/format_reward_step": 0.98828125, "step": 196 }, { "aux_distill/final_loss": 0.0042866374697041465, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08987042051739991, "aux_distill/mean_u": 0.32845601318818596, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 253.375, "aux_distill/step_loss": 0.8858442883938551, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.494430693069307, "calib/avg_num_step_conf": 7.91796875, "calib/ece": 0.2721739130434783, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.03557312252964427, "calib/gap": 0.029678217821782177, "calib/mean_conf": 0.18434782608695652, "calib/mu_c": 0.2021782178217822, "calib/mu_w": 0.17250000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.028656126482213447, "calib/std_conf": 0.241415599465392, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.25859353023909987, "calib/step_q_c_n": 711.0, "calib/step_q_gap": -0.007384433286735992, "calib/step_q_w": 0.26597796352583586, "calib/step_q_w_n": 1316.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2605.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 461.1953125, "completions/mean_terminated_length": 463.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.21013333333333334, "grad_norm": 0.006538861431181431, "learning_rate": 8.333333333333334e-08, "loss": 0.1229, "num_tokens": 42826528.0, "reward": 1.0224547386169434, "reward_std": 0.15360164642333984, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6620968580245972, "rewards/format_reward_step": 0.98828125, "step": 197 }, { "aux_distill/final_loss": 0.011154031611567916, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08801236690487713, "aux_distill/mean_u": 0.2723190910741564, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 240.0, "aux_distill/step_loss": 0.8466615602374077, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5545843045843046, "calib/avg_num_step_conf": 7.84765625, "calib/ece": 0.335863453815261, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0642570281124498, "calib/gap": 0.05479020979020982, "calib/mean_conf": 0.21377510040160644, "calib/mu_c": 0.2428205128205128, "calib/mu_w": 0.188030303030303, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03987951807228916, "calib/std_conf": 0.2722408233274982, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33030226700251886, "calib/step_q_c_n": 794.0, "calib/step_q_gap": 0.08028580609716907, "calib/step_q_w": 0.2500164609053498, "calib/step_q_w_n": 1215.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2260.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 423.33984375, "completions/mean_terminated_length": 430.0595397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.2112, "grad_norm": 0.005951710045337677, "learning_rate": 5.555555555555556e-08, "loss": 0.1031, "num_tokens": 43040287.0, "reward": 1.0273165702819824, "reward_std": 0.18940387666225433, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6210394501686096, "rewards/format_reward_step": 0.97265625, "step": 198 }, { "aux_distill/final_loss": 0.0005417467523329833, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08579555188771337, "aux_distill/mean_u": 0.3156596461552704, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 248.5, "aux_distill/step_loss": 0.8563302643597126, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5573781291172596, "calib/avg_num_step_conf": 8.41015625, "calib/ece": 0.34425101214574894, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.024291497975708502, "calib/gap": 0.036978919631093565, "calib/mean_conf": 0.1773684210526316, "calib/mu_c": 0.19713043478260872, "calib/mu_w": 0.16015151515151516, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.028016194331983796, "calib/std_conf": 0.2313697067228225, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.26884041184041185, "calib/step_q_c_n": 777.0, "calib/step_q_gap": 9.18653287839688e-05, "calib/step_q_w": 0.2687485465116279, "calib/step_q_w_n": 1376.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2378.0, "completions/max_terminated_length": 2378.0, "completions/mean_length": 477.7109375, "completions/mean_terminated_length": 493.1209411621094, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.21226666666666666, "grad_norm": 0.005312070716172457, "learning_rate": 2.777777777777778e-08, "loss": 0.0383, "num_tokens": 43266781.0, "reward": 1.0092904567718506, "reward_std": 0.1893027126789093, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6084246039390564, "rewards/format_reward_step": 0.9609375, "step": 199 }, { "aux_distill/final_loss": 0.00040837999495124677, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.30000000000000004, "aux_distill/loss": 0.08290134533308446, "aux_distill/mean_u": 0.26329027914929476, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 234.5, "aux_distill/step_loss": 0.8277882859110832, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4896712158808933, "calib/avg_num_step_conf": 7.453125, "calib/ece": 0.3749606299212598, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": -0.007093052109181153, "calib/mean_conf": 0.18661417322834645, "calib/mu_c": 0.18298387096774194, "calib/mu_w": 0.1900769230769231, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.036692913385826774, "calib/std_conf": 0.2402990933967679, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.27685941043083895, "calib/step_q_c_n": 882.0, "calib/step_q_gap": 0.005134069300234689, "calib/step_q_w": 0.27172534113060426, "calib/step_q_w_n": 1026.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 470.5, "completions/mean_terminated_length": 472.3451232910156, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.21333333333333335, "grad_norm": 0.005917710717767477, "learning_rate": 0.0, "loss": 0.0826, "num_tokens": 43495277.0, "reward": 1.030428409576416, "reward_std": 0.15025684237480164, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5921066403388977, "rewards/format_reward_step": 0.984375, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.11930906091816723, "train_runtime": 18357.8253, "train_samples_per_second": 2.789, "train_steps_per_second": 0.011 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 43495277, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }