{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.9022029798764449, "aux_distill/mean_u": 0.3108363672915008, "aux_distill/n_active_tok": 54.0, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.012647395953536034, "learning_rate": 2.5000000000000004e-07, "loss": 0.0686, "num_tokens": 264685.0, "reward": 0.037574999034404755, "reward_std": 0.07449960708618164, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.5604067397745032, "aux_distill/mean_u": 0.29356464889172007, "aux_distill/n_active_tok": 58.36842105263158, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.008666743524372578, "learning_rate": 5.000000000000001e-07, "loss": 0.099, "num_tokens": 533467.0, "reward": 0.07537207007408142, "reward_std": 0.14035090804100037, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6924514323472977, "aux_distill/mean_u": 0.24409276830107252, "aux_distill/n_active_tok": 51.2, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.5520833333333334, "calib/avg_num_step_conf": 0.26953125, "calib/ece": 0.5471428571428572, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.0029166666666665675, "calib/mean_conf": 0.9600000000000001, "calib/mu_c": 0.9616666666666666, "calib/mu_w": 0.95875, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.5392857142857143, "calib/std_conf": 0.03229329872987803, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 672.12890625, "completions/mean_terminated_length": 732.1914672851562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.0068152653984725475, "learning_rate": 7.5e-07, "loss": 0.0357, "num_tokens": 810788.0, "reward": 0.0320499986410141, "reward_std": 0.07596391439437866, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.013318749144673347, "rewards/format_reward_step": 0.02734375, "step": 3 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.775890588760376, "aux_distill/mean_u": 0.24166433444907634, "aux_distill/n_active_tok": 37.333333333333336, "calib/answer_extract_rate": 0.0703125, "calib/avg_num_step_conf": 0.16796875, "calib/ece": 0.9345454545454546, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/mean_conf": 0.9345454545454543, "calib/mu_c": NaN, "calib/mu_w": 0.9345454545454543, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.9345454545454546, "calib/std_conf": 0.0909908723058264, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 621.32421875, "completions/mean_terminated_length": 673.9788208007812, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.004266666666666667, "grad_norm": 0.006891297642141581, "learning_rate": 1.0000000000000002e-06, "loss": 0.0315, "num_tokens": 1076015.0, "reward": 0.014284765347838402, "reward_std": 0.02579544484615326, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.001225781161338091, "rewards/format_reward_step": 0.02734375, "step": 4 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6852438772718111, "aux_distill/mean_u": 0.16246693256067343, "aux_distill/n_active_tok": 26.666666666666668, "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.5357142857142857, "calib/avg_num_step_conf": 0.08203125, "calib/ece": 0.6766666666666665, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.5555555555555556, "calib/gap": 0.0014285714285714457, "calib/mean_conf": 0.8988888888888888, "calib/mu_c": 0.9, "calib/mu_w": 0.8985714285714286, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.6766666666666665, "calib/std_conf": 0.1257373315416304, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 677.96484375, "completions/mean_terminated_length": 754.6043090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.006530698388814926, "learning_rate": 1.25e-06, "loss": 0.0432, "num_tokens": 1356262.0, "reward": 0.017167968675494194, "reward_std": 0.048558346927165985, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.010898437350988388, "rewards/format_reward_step": 0.015625, "step": 5 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.6688954555071317, "aux_distill/mean_u": 0.35205855050102514, "aux_distill/n_active_tok": 54.76923076923077, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.34615384615384615, "calib/avg_num_step_conf": 0.34765625, "calib/ece": 0.725, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.6875, "calib/gap": -0.021538461538461617, "calib/mean_conf": 0.8775000000000001, "calib/mu_c": 0.86, "calib/mu_w": 0.8815384615384616, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.7075, "calib/std_conf": 0.14813422967025547, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 575.47265625, "completions/mean_terminated_length": 626.8978271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0064, "grad_norm": 0.006988861598074436, "learning_rate": 1.5e-06, "loss": 0.0504, "num_tokens": 1609535.0, "reward": 0.03744082152843475, "reward_std": 0.07511966675519943, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01628788933157921, "rewards/format_reward_step": 0.046875, "step": 6 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.6855257414281368, "aux_distill/mean_u": 0.32664575830852394, "aux_distill/n_active_tok": 36.5625, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.7727272727272727, "calib/avg_num_step_conf": 0.30078125, "calib/ece": 0.7469230769230768, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.10545454545454536, "calib/mean_conf": 0.9007692307692309, "calib/mu_c": 0.99, "calib/mu_w": 0.8845454545454546, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.7469230769230768, "calib/std_conf": 0.16202856056223489, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 662.6015625, "completions/mean_terminated_length": 734.3117065429688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.007448374293744564, "learning_rate": 1.75e-06, "loss": 0.0134, "num_tokens": 1886585.0, "reward": 0.030934961512684822, "reward_std": 0.06625574827194214, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.01499492209404707, "rewards/format_reward_step": 0.0390625, "step": 7 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.7116753607988358, "aux_distill/mean_u": 0.26608393808950553, "aux_distill/n_active_tok": 65.91666666666667, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.16666666666666669, "calib/avg_num_step_conf": 0.375, "calib/ece": 0.7890909090909093, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.013333333333333308, "calib/mean_conf": 0.970909090909091, "calib/mu_c": 0.96, "calib/mu_w": 0.9733333333333333, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.7890909090909093, "calib/std_conf": 0.011642044068059734, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2986.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 660.27734375, "completions/mean_terminated_length": 719.2808227539062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.005775726865977049, "learning_rate": 2.0000000000000003e-06, "loss": 0.0424, "num_tokens": 2162128.0, "reward": 0.025961915031075478, "reward_std": 0.061920247972011566, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.008955078199505806, "rewards/format_reward_step": 0.03125, "step": 8 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6978093602440574, "aux_distill/mean_u": 0.19239018482123108, "aux_distill/n_active_tok": 25.454545454545453, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.65, "calib/avg_num_step_conf": 0.140625, "calib/ece": 0.7108333333333334, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.08700000000000008, "calib/mean_conf": 0.8775, "calib/mu_c": 0.95, "calib/mu_w": 0.8629999999999999, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.7108333333333334, "calib/std_conf": 0.15562374497485915, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2826.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 633.48828125, "completions/mean_terminated_length": 717.5796508789062, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0096, "grad_norm": 0.0072958157397806644, "learning_rate": 2.25e-06, "loss": 0.0397, "num_tokens": 2431837.0, "reward": 0.030364451929926872, "reward_std": 0.05786587670445442, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.013853906653821468, "rewards/format_reward_step": 0.0390625, "step": 9 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.6190562213168425, "aux_distill/mean_u": 0.2795340197168761, "aux_distill/n_active_tok": 38.35294117647059, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.4210526315789474, "calib/avg_num_step_conf": 0.31640625, "calib/ece": 0.8112499999999999, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.65, "calib/gap": 0.09342105263157874, "calib/mean_conf": 0.8612500000000001, "calib/mu_c": 0.95, "calib/mu_w": 0.8565789473684212, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.8112499999999999, "calib/std_conf": 0.23361225888210574, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 744.4375, "completions/mean_terminated_length": 807.5254516601562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.007861468009650707, "learning_rate": 2.5e-06, "loss": 0.0948, "num_tokens": 2729213.0, "reward": 0.035783737897872925, "reward_std": 0.09035970270633698, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.016879979521036148, "rewards/format_reward_step": 0.05078125, "step": 10 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.6934706956148148, "aux_distill/mean_u": 0.29880514015963444, "aux_distill/n_active_tok": 48.1, "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.5069444444444444, "calib/avg_num_step_conf": 0.46875, "calib/ece": 0.6405555555555555, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.13750000000000007, "calib/mean_conf": 0.785, "calib/mu_c": 0.6933333333333334, "calib/mu_w": 0.8308333333333334, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.5461111111111112, "calib/std_conf": 0.3197785692214744, "calib/step_conf_rate": 0.125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2792.0, "completions/max_terminated_length": 2792.0, "completions/mean_length": 662.73828125, "completions/mean_terminated_length": 721.961669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.01090265903621912, "learning_rate": 2.7500000000000004e-06, "loss": 0.0945, "num_tokens": 3003354.0, "reward": 0.060478322207927704, "reward_std": 0.11573203653097153, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.02720664069056511, "rewards/format_reward_step": 0.06640625, "step": 11 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.6174011901021004, "aux_distill/mean_u": 0.3561512000879931, "aux_distill/n_active_tok": 80.75, "calib/answer_extract_rate": 0.19921875, "calib/auroc": 0.5722222222222222, "calib/avg_num_step_conf": 0.94921875, "calib/ece": 0.6661538461538461, "calib/final_conf_rate": 0.15234375, "calib/format_rate": 0.109375, "calib/frac_conf_gt_0.9": 0.7435897435897436, "calib/gap": 0.07766666666666666, "calib/mean_conf": 0.8969230769230768, "calib/mu_c": 0.9566666666666666, "calib/mu_w": 0.8789999999999999, "calib/nonempty_final_conf_rate": 0.15234375, "calib/nonempty_reasoning_rate": 0.23828125, "calib/nonempty_step_conf_rate": 0.171875, "calib/pce": 0.6661538461538461, "calib/std_conf": 0.16687403273364296, "calib/step_conf_rate": 0.171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2993.0, "completions/max_terminated_length": 2993.0, "completions/mean_length": 604.328125, "completions/mean_terminated_length": 658.3319091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.009621622040867805, "learning_rate": 3e-06, "loss": 0.0646, "num_tokens": 3262238.0, "reward": 0.0920531302690506, "reward_std": 0.18517741560935974, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.0395749993622303, "rewards/format_reward_step": 0.109375, "step": 12 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.5915297634071774, "aux_distill/mean_u": 0.24548161004521343, "aux_distill/n_active_tok": 61.77777777777778, "calib/answer_extract_rate": 0.1328125, "calib/auroc": 0.7301587301587302, "calib/avg_num_step_conf": 0.54296875, "calib/ece": 0.7495833333333334, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.7083333333333334, "calib/gap": 0.08238095238095222, "calib/mean_conf": 0.8745833333333334, "calib/mu_c": 0.9466666666666667, "calib/mu_w": 0.8642857142857144, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.16796875, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.7495833333333334, "calib/std_conf": 0.19004339782153853, "calib/step_conf_rate": 0.125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 630.265625, "completions/mean_terminated_length": 692.480712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.008549676276743412, "learning_rate": 3.2500000000000002e-06, "loss": 0.069, "num_tokens": 3528178.0, "reward": 0.05812598019838333, "reward_std": 0.10661930590867996, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.026408202946186066, "rewards/format_reward_step": 0.078125, "step": 13 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5545808651617595, "aux_distill/mean_u": 0.287806905984641, "aux_distill/n_active_tok": 61.285714285714285, "calib/answer_extract_rate": 0.234375, "calib/auroc": 0.6428571428571429, "calib/avg_num_step_conf": 0.87109375, "calib/ece": 0.6372045454545454, "calib/final_conf_rate": 0.171875, "calib/format_rate": 0.1328125, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.15560231660231671, "calib/mean_conf": 0.7962954545454545, "calib/mu_c": 0.9271428571428572, "calib/mu_w": 0.7715405405405404, "calib/nonempty_final_conf_rate": 0.171875, "calib/nonempty_reasoning_rate": 0.296875, "calib/nonempty_step_conf_rate": 0.20703125, "calib/pce": 0.6372045454545454, "calib/std_conf": 0.2955989743143614, "calib/step_conf_rate": 0.20703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2701.0, "completions/max_terminated_length": 2701.0, "completions/mean_length": 581.75390625, "completions/mean_terminated_length": 615.4090576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.010811959393322468, "learning_rate": 3.5e-06, "loss": 0.1161, "num_tokens": 3782507.0, "reward": 0.10872349888086319, "reward_std": 0.18449640274047852, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.05729074776172638, "rewards/format_reward_step": 0.1328125, "step": 14 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.534116182861657, "aux_distill/mean_u": 0.3509062857245206, "aux_distill/n_active_tok": 85.51724137931035, "calib/answer_extract_rate": 0.2109375, "calib/auroc": 0.6041666666666666, "calib/avg_num_step_conf": 1.20703125, "calib/ece": 0.655909090909091, "calib/final_conf_rate": 0.171875, "calib/format_rate": 0.1484375, "calib/frac_conf_gt_0.9": 0.5454545454545454, "calib/gap": 0.125, "calib/mean_conf": 0.8377272727272728, "calib/mu_c": 0.94, "calib/mu_w": 0.815, "calib/nonempty_final_conf_rate": 0.171875, "calib/nonempty_reasoning_rate": 0.28515625, "calib/nonempty_step_conf_rate": 0.23828125, "calib/pce": 0.655909090909091, "calib/std_conf": 0.21094876194301132, "calib/step_conf_rate": 0.23828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 663.78125, "completions/mean_terminated_length": 710.9957885742188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.016, "grad_norm": 0.009891645051538944, "learning_rate": 3.7500000000000005e-06, "loss": 0.1809, "num_tokens": 4060315.0, "reward": 0.12209217995405197, "reward_std": 0.2036409080028534, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.06449687480926514, "rewards/format_reward_step": 0.1484375, "step": 15 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.551109317690134, "aux_distill/mean_u": 0.41506540252915974, "aux_distill/n_active_tok": 92.29166666666667, "calib/answer_extract_rate": 0.23828125, "calib/auroc": 0.5151515151515151, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.6365238095238095, "calib/final_conf_rate": 0.1640625, "calib/format_rate": 0.140625, "calib/frac_conf_gt_0.9": 0.5714285714285714, "calib/gap": 0.013373737373737482, "calib/mean_conf": 0.8110476190476189, "calib/mu_c": 0.8215555555555556, "calib/mu_w": 0.8081818181818181, "calib/nonempty_final_conf_rate": 0.1640625, "calib/nonempty_reasoning_rate": 0.29296875, "calib/nonempty_step_conf_rate": 0.21484375, "calib/pce": 0.6166428571428573, "calib/std_conf": 0.2606996038483987, "calib/step_conf_rate": 0.21484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2850.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 599.62109375, "completions/mean_terminated_length": 644.9706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.009639227762818336, "learning_rate": 4.000000000000001e-06, "loss": 0.1095, "num_tokens": 4322666.0, "reward": 0.11668315529823303, "reward_std": 0.21993079781532288, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.05367880314588547, "rewards/format_reward_step": 0.140625, "step": 16 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5508362718166844, "aux_distill/mean_u": 0.3202794545006914, "aux_distill/n_active_tok": 119.35483870967742, "calib/answer_extract_rate": 0.359375, "calib/auroc": 0.5582089552238806, "calib/avg_num_step_conf": 1.7890625, "calib/ece": 0.5943902439024391, "calib/final_conf_rate": 0.3203125, "calib/format_rate": 0.26171875, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.041472636815920505, "calib/mean_conf": 0.748780487804878, "calib/mu_c": 0.7826666666666667, "calib/mu_w": 0.7411940298507462, "calib/nonempty_final_conf_rate": 0.3203125, "calib/nonempty_reasoning_rate": 0.42578125, "calib/nonempty_step_conf_rate": 0.3515625, "calib/pce": 0.5801219512195123, "calib/std_conf": 0.3001153853059237, "calib/step_conf_rate": 0.3515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 535.5625, "completions/mean_terminated_length": 555.0769653320312, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.018133333333333335, "grad_norm": 0.012681329622864723, "learning_rate": 4.25e-06, "loss": 0.1619, "num_tokens": 4563298.0, "reward": 0.22102656960487366, "reward_std": 0.3276210427284241, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.11783437430858612, "rewards/format_reward_step": 0.26171875, "step": 17 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5232953188880798, "aux_distill/mean_u": 0.3262361357347477, "aux_distill/n_active_tok": 98.48387096774194, "calib/answer_extract_rate": 0.33203125, "calib/auroc": 0.6807628524046434, "calib/avg_num_step_conf": 1.47265625, "calib/ece": 0.5537157894736842, "calib/final_conf_rate": 0.296875, "calib/format_rate": 0.20703125, "calib/frac_conf_gt_0.9": 0.39473684210526316, "calib/gap": 0.20301558872305125, "calib/mean_conf": 0.6721368421052633, "calib/mu_c": 0.8511111111111112, "calib/mu_w": 0.6480955223880599, "calib/nonempty_final_conf_rate": 0.296875, "calib/nonempty_reasoning_rate": 0.4140625, "calib/nonempty_step_conf_rate": 0.328125, "calib/pce": 0.5537157894736842, "calib/std_conf": 0.3439237510515901, "calib/step_conf_rate": 0.328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 554.59765625, "completions/mean_terminated_length": 572.4879150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0192, "grad_norm": 0.009989089332520962, "learning_rate": 4.5e-06, "loss": 0.1477, "num_tokens": 4815995.0, "reward": 0.17580628395080566, "reward_std": 0.2827049195766449, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.10942507535219193, "rewards/format_reward_step": 0.20703125, "step": 18 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5288848485797644, "aux_distill/mean_u": 0.3808616730169923, "aux_distill/n_active_tok": 209.78125, "calib/answer_extract_rate": 0.62109375, "calib/auroc": 0.5075464396284829, "calib/avg_num_step_conf": 3.19140625, "calib/ece": 0.45987161290322576, "calib/final_conf_rate": 0.60546875, "calib/format_rate": 0.4921875, "calib/frac_conf_gt_0.9": 0.24516129032258063, "calib/gap": 0.028739899380805012, "calib/mean_conf": 0.5408251612903225, "calib/mu_c": 0.5660421052631579, "calib/mu_w": 0.5373022058823529, "calib/nonempty_final_conf_rate": 0.60546875, "calib/nonempty_reasoning_rate": 0.76953125, "calib/nonempty_step_conf_rate": 0.671875, "calib/pce": 0.43905806451612905, "calib/std_conf": 0.3514235209036652, "calib/step_conf_rate": 0.671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3024.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 405.7265625, "completions/mean_terminated_length": 420.5101318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.020266666666666665, "grad_norm": 0.015777073800563812, "learning_rate": 4.75e-06, "loss": 0.1692, "num_tokens": 5024621.0, "reward": 0.4331209063529968, "reward_std": 0.4471513032913208, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.29592931270599365, "rewards/format_reward_step": 0.4921875, "step": 19 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5358412060886621, "aux_distill/mean_u": 0.3330040179088781, "aux_distill/n_active_tok": 235.40625, "calib/answer_extract_rate": 0.7734375, "calib/auroc": 0.5045928281222398, "calib/avg_num_step_conf": 3.58203125, "calib/ece": 0.3642105263157895, "calib/final_conf_rate": 0.7421875, "calib/format_rate": 0.640625, "calib/frac_conf_gt_0.9": 0.15789473684210525, "calib/gap": 0.015520226108461432, "calib/mean_conf": 0.46642105263157896, "calib/mu_c": 0.47891891891891897, "calib/mu_w": 0.46339869281045754, "calib/nonempty_final_conf_rate": 0.7421875, "calib/nonempty_reasoning_rate": 0.90625, "calib/nonempty_step_conf_rate": 0.83203125, "calib/pce": 0.31794736842105265, "calib/std_conf": 0.3506382631099845, "calib/step_conf_rate": 0.83203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2922.0, "completions/max_terminated_length": 2922.0, "completions/mean_length": 315.515625, "completions/mean_terminated_length": 321.8008117675781, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.021333333333333333, "grad_norm": 0.015699947252869606, "learning_rate": 5e-06, "loss": 0.1381, "num_tokens": 5210265.0, "reward": 0.6040115356445312, "reward_std": 0.46912193298339844, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.4150542914867401, "rewards/format_reward_step": 0.640625, "step": 20 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5463973861187696, "aux_distill/mean_u": 0.32214415168677285, "aux_distill/n_active_tok": 264.90625, "calib/answer_extract_rate": 0.828125, "calib/auroc": 0.4245585874799358, "calib/avg_num_step_conf": 3.9140625, "calib/ece": 0.319245145631068, "calib/final_conf_rate": 0.8046875, "calib/format_rate": 0.75, "calib/frac_conf_gt_0.9": 0.10679611650485436, "calib/gap": -0.08127889245585873, "calib/mean_conf": 0.3720169902912622, "calib/mu_c": 0.30178571428571427, "calib/mu_w": 0.383064606741573, "calib/nonempty_final_conf_rate": 0.8046875, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.89453125, "calib/pce": 0.2776699029126214, "calib/std_conf": 0.3370346662655462, "calib/step_conf_rate": 0.89453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2032.0, "completions/max_terminated_length": 2032.0, "completions/mean_length": 318.78515625, "completions/mean_terminated_length": 321.2952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.0224, "grad_norm": 0.016127225011587143, "learning_rate": 4.9722222222222224e-06, "loss": 0.2055, "num_tokens": 5394834.0, "reward": 0.6895323991775513, "reward_std": 0.4213147759437561, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.5157834887504578, "rewards/format_reward_step": 0.75, "step": 21 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.500660709105432, "aux_distill/mean_u": 0.3673933193010618, "aux_distill/n_active_tok": 297.125, "calib/answer_extract_rate": 0.890625, "calib/auroc": 0.39062499999999994, "calib/avg_num_step_conf": 4.60546875, "calib/ece": 0.3734529680365296, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.1689497716894977, "calib/gap": -0.13653755787037036, "calib/mean_conf": 0.4323004566210046, "calib/mu_c": 0.3125962962962963, "calib/mu_w": 0.4491338541666667, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.3412328767123287, "calib/std_conf": 0.34979617502728416, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2701.0, "completions/max_terminated_length": 2701.0, "completions/mean_length": 326.50390625, "completions/mean_terminated_length": 327.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.023466666666666667, "grad_norm": 0.012950566597282887, "learning_rate": 4.944444444444445e-06, "loss": 0.2034, "num_tokens": 5580235.0, "reward": 0.727470338344574, "reward_std": 0.36542022228240967, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.529159426689148, "rewards/format_reward_step": 0.8203125, "step": 22 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5794403096660972, "aux_distill/mean_u": 0.3093092033622721, "aux_distill/n_active_tok": 255.09375, "calib/answer_extract_rate": 0.8203125, "calib/auroc": 0.4833436788744051, "calib/avg_num_step_conf": 3.78515625, "calib/ece": 0.2792660194174757, "calib/final_conf_rate": 0.8046875, "calib/format_rate": 0.75390625, "calib/frac_conf_gt_0.9": 0.07766990291262135, "calib/gap": -0.027863107800538023, "calib/mean_conf": 0.3438407766990291, "calib/mu_c": 0.3196296296296296, "calib/mu_w": 0.3474927374301676, "calib/nonempty_final_conf_rate": 0.8046875, "calib/nonempty_reasoning_rate": 0.94921875, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.24601941747572814, "calib/std_conf": 0.3269325359987017, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2109.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 303.39453125, "completions/mean_terminated_length": 305.7834777832031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.01320746261626482, "learning_rate": 4.9166666666666665e-06, "loss": 0.2314, "num_tokens": 5761840.0, "reward": 0.7033728361129761, "reward_std": 0.4077947735786438, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.5473706722259521, "rewards/format_reward_step": 0.75390625, "step": 23 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5133433574810624, "aux_distill/mean_u": 0.3777117190213353, "aux_distill/n_active_tok": 289.9375, "calib/answer_extract_rate": 0.91796875, "calib/auroc": 0.5641506602641055, "calib/avg_num_step_conf": 4.4375, "calib/ece": 0.2902073913043478, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.84765625, "calib/frac_conf_gt_0.9": 0.1, "calib/gap": 0.06962457983193282, "calib/mean_conf": 0.3789030434782609, "calib/mu_c": 0.43823529411764706, "calib/mu_w": 0.36861071428571424, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.26064217391304345, "calib/std_conf": 0.3273176261401784, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2679.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 305.03125, "completions/mean_terminated_length": 306.22747802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0256, "grad_norm": 0.01090239454060793, "learning_rate": 4.888888888888889e-06, "loss": 0.1741, "num_tokens": 5944440.0, "reward": 0.802044153213501, "reward_std": 0.35184115171432495, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.623619556427002, "rewards/format_reward_step": 0.84765625, "step": 24 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5091779874637723, "aux_distill/mean_u": 0.3399544439281606, "aux_distill/n_active_tok": 275.5, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.6165458141067898, "calib/avg_num_step_conf": 4.28515625, "calib/ece": 0.220200826446281, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.07024793388429752, "calib/gap": 0.10513278839815426, "calib/mean_conf": 0.3250223140495867, "calib/mu_c": 0.4140810810810811, "calib/mu_w": 0.30894829268292684, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.19616528925619836, "calib/std_conf": 0.3018464867580852, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2797.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 254.38671875, "completions/mean_terminated_length": 256.3897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.02666666666666667, "grad_norm": 0.0111892344430089, "learning_rate": 4.861111111111111e-06, "loss": 0.1203, "num_tokens": 6112787.0, "reward": 0.8762689828872681, "reward_std": 0.3159981966018677, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.7056629061698914, "rewards/format_reward_step": 0.90234375, "step": 25 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5155609296634793, "aux_distill/mean_u": 0.4098851851699954, "aux_distill/n_active_tok": 251.9375, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4148527528809219, "calib/avg_num_step_conf": 3.87890625, "calib/ece": 0.2528608510638298, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.05531914893617021, "calib/gap": -0.08365330772513868, "calib/mean_conf": 0.2944582978723404, "calib/mu_c": 0.21863636363636363, "calib/mu_w": 0.3022896713615023, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22685106382978726, "calib/std_conf": 0.3056578070766229, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 243.84765625, "completions/mean_terminated_length": 243.84765625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 0.027733333333333332, "grad_norm": 0.011879602447152138, "learning_rate": 4.833333333333333e-06, "loss": 0.1231, "num_tokens": 6280452.0, "reward": 0.8439472913742065, "reward_std": 0.28856855630874634, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.6957069635391235, "rewards/format_reward_step": 0.90625, "step": 26 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4900315869599581, "aux_distill/mean_u": 0.3588746115217724, "aux_distill/n_active_tok": 251.34375, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5614362806143629, "calib/avg_num_step_conf": 3.9296875, "calib/ece": 0.2168344398340249, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.03734439834024896, "calib/gap": 0.07003034454130347, "calib/mean_conf": 0.2768170124481327, "calib/mu_c": 0.34045454545454545, "calib/mu_w": 0.270424200913242, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.20118257261410788, "calib/std_conf": 0.27174081454851323, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1898.0, "completions/max_terminated_length": 1898.0, "completions/mean_length": 226.91015625, "completions/mean_terminated_length": 228.6968536376953, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.0288, "grad_norm": 0.01075553335249424, "learning_rate": 4.805555555555556e-06, "loss": 0.0758, "num_tokens": 6443757.0, "reward": 0.8853782415390015, "reward_std": 0.24783457815647125, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.7590377330780029, "rewards/format_reward_step": 0.92578125, "step": 27 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5010905349627137, "aux_distill/mean_u": 0.35061370361301236, "aux_distill/n_active_tok": 253.0625, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.40372168284789645, "calib/avg_num_step_conf": 3.90625, "calib/ece": 0.23082326530612246, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.0326530612244898, "calib/gap": -0.06636441374159821, "calib/mean_conf": 0.2655848979591837, "calib/mu_c": 0.20978461538461537, "calib/mu_w": 0.2761490291262136, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.1686122448979592, "calib/std_conf": 0.2550969035094147, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2758.0, "completions/max_terminated_length": 2758.0, "completions/mean_length": 226.02734375, "completions/mean_terminated_length": 226.02734375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.029866666666666666, "grad_norm": 0.010541618801653385, "learning_rate": 4.777777777777778e-06, "loss": 0.0724, "num_tokens": 6608564.0, "reward": 0.8957502245903015, "reward_std": 0.2513200044631958, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.7133753895759583, "rewards/format_reward_step": 0.92578125, "step": 28 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5015281261876225, "aux_distill/mean_u": 0.37213500914810527, "aux_distill/n_active_tok": 242.53125, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5502812939521801, "calib/avg_num_step_conf": 3.734375, "calib/ece": 0.21642570281124499, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": 0.0666518987341772, "calib/mean_conf": 0.2557269076305221, "calib/mu_c": 0.31916666666666665, "calib/mu_w": 0.25251476793248945, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.21197991967871488, "calib/std_conf": 0.24414579528695696, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 211.65625, "completions/mean_terminated_length": 212.4862823486328, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.030933333333333334, "grad_norm": 0.011848612688481808, "learning_rate": 4.75e-06, "loss": 0.0688, "num_tokens": 6769876.0, "reward": 0.8962675333023071, "reward_std": 0.19631339609622955, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.8042538166046143, "rewards/format_reward_step": 0.94140625, "step": 29 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48358502611517906, "aux_distill/mean_u": 0.31133066449118657, "aux_distill/n_active_tok": 217.15625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47853773584905657, "calib/avg_num_step_conf": 3.36328125, "calib/ece": 0.17899841269841268, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": -0.0208839622641509, "calib/mean_conf": 0.2563190476190476, "calib/mu_c": 0.23875000000000002, "calib/mu_w": 0.2596339622641509, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13829365079365077, "calib/std_conf": 0.23551776218335396, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 627.0, "completions/max_terminated_length": 627.0, "completions/mean_length": 172.63671875, "completions/mean_terminated_length": 173.31373596191406, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.032, "grad_norm": 0.01241193525493145, "learning_rate": 4.722222222222222e-06, "loss": 0.0386, "num_tokens": 6921055.0, "reward": 0.9601128101348877, "reward_std": 0.15688207745552063, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.7796005010604858, "rewards/format_reward_step": 0.98046875, "step": 30 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49283486418426037, "aux_distill/mean_u": 0.31671603956800987, "aux_distill/n_active_tok": 202.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5469432314410481, "calib/avg_num_step_conf": 3.1171875, "calib/ece": 0.18115298804780877, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": 0.05664088924176261, "calib/mean_conf": 0.2451418326693227, "calib/mu_c": 0.2968181818181818, "calib/mu_w": 0.2401772925764192, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.16932270916334663, "calib/std_conf": 0.22780424523358234, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 460.0, "completions/max_terminated_length": 460.0, "completions/mean_length": 159.74609375, "completions/mean_terminated_length": 160.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.03306666666666667, "grad_norm": 0.012938912026584148, "learning_rate": 4.694444444444445e-06, "loss": 0.0724, "num_tokens": 7067862.0, "reward": 0.9331058263778687, "reward_std": 0.1786496937274933, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.8193366527557373, "rewards/format_reward_step": 0.9609375, "step": 31 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5122799929231405, "aux_distill/mean_u": 0.3455168102658182, "aux_distill/n_active_tok": 202.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5427272727272726, "calib/avg_num_step_conf": 3.1328125, "calib/ece": 0.17164000000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.016, "calib/gap": 0.05277272727272722, "calib/mean_conf": 0.24356, "calib/mu_c": 0.29, "calib/mu_w": 0.23722727272727276, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14759999999999998, "calib/std_conf": 0.2167369982259605, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 483.0, "completions/max_terminated_length": 483.0, "completions/mean_length": 157.83203125, "completions/mean_terminated_length": 158.45098876953125, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.034133333333333335, "grad_norm": 0.013937647454440594, "learning_rate": 4.666666666666667e-06, "loss": 0.0398, "num_tokens": 7214971.0, "reward": 0.9550351500511169, "reward_std": 0.15868309140205383, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.8202266097068787, "rewards/format_reward_step": 0.97265625, "step": 32 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5047999192029238, "aux_distill/mean_u": 0.3599142013757496, "aux_distill/n_active_tok": 199.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5623517786561265, "calib/avg_num_step_conf": 3.125, "calib/ece": 0.13408730158730156, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.034723320158102755, "calib/mean_conf": 0.19694444444444445, "calib/mu_c": 0.22863636363636364, "calib/mu_w": 0.19391304347826088, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12186507936507934, "calib/std_conf": 0.19079145123920305, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 167.23828125, "completions/mean_terminated_length": 167.23828125, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.0352, "grad_norm": 0.013600134290754795, "learning_rate": 4.638888888888889e-06, "loss": 0.0804, "num_tokens": 7364656.0, "reward": 0.9592347145080566, "reward_std": 0.12692934274673462, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.8559695482254028, "rewards/format_reward_step": 0.9765625, "step": 33 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47679288033396006, "aux_distill/mean_u": 0.33165973203230903, "aux_distill/n_active_tok": 215.75, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5446511627906977, "calib/avg_num_step_conf": 3.375, "calib/ece": 0.11831999999999998, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01176079734219268, "calib/mean_conf": 0.18560000000000001, "calib/mu_c": 0.1957142857142857, "calib/mu_w": 0.18395348837209302, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08195999999999999, "calib/std_conf": 0.17300127167162674, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 180.69921875, "completions/mean_terminated_length": 180.69921875, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.03626666666666667, "grad_norm": 0.01341802254319191, "learning_rate": 4.611111111111112e-06, "loss": 0.0663, "num_tokens": 7516027.0, "reward": 0.9679831862449646, "reward_std": 0.1320965737104416, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.826591432094574, "rewards/format_reward_step": 0.97265625, "step": 34 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5014631142839789, "aux_distill/mean_u": 0.3500040735053896, "aux_distill/n_active_tok": 245.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5164835164835164, "calib/avg_num_step_conf": 3.859375, "calib/ece": 0.12572549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.025903540903540884, "calib/mean_conf": 0.13670588235294118, "calib/mu_c": 0.16047619047619047, "calib/mu_w": 0.13457264957264958, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0900392156862745, "calib/std_conf": 0.15651139081739068, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 488.0, "completions/max_terminated_length": 488.0, "completions/mean_length": 173.75390625, "completions/mean_terminated_length": 174.435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.037333333333333336, "grad_norm": 0.012046094983816147, "learning_rate": 4.583333333333333e-06, "loss": 0.0359, "num_tokens": 7669764.0, "reward": 0.9877500534057617, "reward_std": 0.06010032445192337, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.8973749876022339, "rewards/format_reward_step": 0.99609375, "step": 35 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4936283128336072, "aux_distill/mean_u": 0.33273859315272475, "aux_distill/n_active_tok": 251.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5374149659863946, "calib/avg_num_step_conf": 3.94921875, "calib/ece": 0.15101562500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01018436360051267, "calib/mean_conf": 0.10523437500000002, "calib/mu_c": 0.11346938775510206, "calib/mu_w": 0.10328502415458939, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.032421874999999996, "calib/std_conf": 0.12121755264135377, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 448.0, "completions/max_terminated_length": 448.0, "completions/mean_length": 178.6484375, "completions/mean_terminated_length": 179.34902954101562, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.0384, "grad_norm": 0.011449523270130157, "learning_rate": 4.555555555555556e-06, "loss": 0.0791, "num_tokens": 7818210.0, "reward": 1.0088348388671875, "reward_std": 0.05037228763103485, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8262632489204407, "rewards/format_reward_step": 1.0, "step": 36 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4734587259590626, "aux_distill/mean_u": 0.29364758001900115, "aux_distill/n_active_tok": 254.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5375, "calib/avg_num_step_conf": 3.97265625, "calib/ece": 0.08688976377952756, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009583333333333666, "calib/mean_conf": 0.08051181102362205, "calib/mu_c": 0.07966666666666665, "calib/mu_w": 0.08062500000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02464566929133858, "calib/std_conf": 0.10341479555286022, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 788.0, "completions/max_terminated_length": 788.0, "completions/mean_length": 185.39453125, "completions/mean_terminated_length": 186.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 40.0, "epoch": 0.039466666666666664, "grad_norm": 0.012011139653623104, "learning_rate": 4.527777777777778e-06, "loss": 0.0557, "num_tokens": 7972767.0, "reward": 0.9930021166801453, "reward_std": 0.04802718758583069, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.8766292333602905, "rewards/format_reward_step": 0.9921875, "step": 37 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5167172281071544, "aux_distill/mean_u": 0.35844834511413254, "aux_distill/n_active_tok": 270.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5392903087478559, "calib/avg_num_step_conf": 4.2265625, "calib/ece": 0.12679687499999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02404802744425384, "calib/mean_conf": 0.06781250000000001, "calib/mu_c": 0.08772727272727271, "calib/mu_w": 0.06367924528301887, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011367187500000002, "calib/std_conf": 0.09589575639072878, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 421.0, "completions/max_terminated_length": 421.0, "completions/mean_length": 186.63671875, "completions/mean_terminated_length": 187.36863708496094, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.04053333333333333, "grad_norm": 0.011758743785321712, "learning_rate": 4.5e-06, "loss": 0.0434, "num_tokens": 8127434.0, "reward": 1.0042815208435059, "reward_std": 0.03750614821910858, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8405945301055908, "rewards/format_reward_step": 0.99609375, "step": 38 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4876714339479804, "aux_distill/mean_u": 0.30279994241838204, "aux_distill/n_active_tok": 263.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.41560473954840155, "calib/avg_num_step_conf": 4.13671875, "calib/ece": 0.14603921568627454, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.018843058350100608, "calib/mean_conf": 0.04454901960784314, "calib/mu_c": 0.02880952380952381, "calib/mu_w": 0.04765258215962442, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012941176470588234, "calib/std_conf": 0.08763388943479686, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 556.0, "completions/max_terminated_length": 556.0, "completions/mean_length": 185.75, "completions/mean_terminated_length": 186.4784393310547, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.0416, "grad_norm": 0.011680186726152897, "learning_rate": 4.472222222222223e-06, "loss": 0.0806, "num_tokens": 8281074.0, "reward": 0.9979601502418518, "reward_std": 0.02412526123225689, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8318578004837036, "rewards/format_reward_step": 0.99609375, "step": 39 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4841670235618949, "aux_distill/mean_u": 0.30947448492696666, "aux_distill/n_active_tok": 258.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45479855879462827, "calib/avg_num_step_conf": 4.046875, "calib/ece": 0.15710937500000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015516977836008292, "calib/mean_conf": 0.03546875000000001, "calib/mu_c": 0.022558139534883725, "calib/mu_w": 0.03807511737089202, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012304687500000001, "calib/std_conf": 0.06960108852192974, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 179.8046875, "completions/mean_terminated_length": 180.5098114013672, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.042666666666666665, "grad_norm": 0.012068229727447033, "learning_rate": 4.444444444444444e-06, "loss": 0.0551, "num_tokens": 8433864.0, "reward": 1.0007379055023193, "reward_std": 0.01349399983882904, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8335070610046387, "rewards/format_reward_step": 1.0, "step": 40 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49721950199455023, "aux_distill/mean_u": 0.3209028292175142, "aux_distill/n_active_tok": 253.8125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46456508135168956, "calib/avg_num_step_conf": 3.984375, "calib/ece": 0.24701093749999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 8.498122653316162e-05, "calib/mean_conf": 0.0186140625, "calib/mu_c": 0.01867647058823529, "calib/mu_w": 0.01859148936170213, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025049272489357725, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 410.0, "completions/max_terminated_length": 410.0, "completions/mean_length": 171.5859375, "completions/mean_terminated_length": 172.2588348388672, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.04373333333333333, "grad_norm": 0.013531737960875034, "learning_rate": 4.416666666666667e-06, "loss": 0.0318, "num_tokens": 8585038.0, "reward": 1.0044739246368408, "reward_std": 0.00973083171993494, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7433229684829712, "rewards/format_reward_step": 1.0, "step": 41 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48902952019125223, "aux_distill/mean_u": 0.32963415527338386, "aux_distill/n_active_tok": 246.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5018560978272737, "calib/avg_num_step_conf": 3.85546875, "calib/ece": 0.1583203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009378753138988959, "calib/mean_conf": 0.0156640625, "calib/mu_c": 0.014883720930232559, "calib/mu_w": 0.015821596244131455, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0030078125, "calib/std_conf": 0.026449178077892963, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 169.76953125, "completions/mean_terminated_length": 170.435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.0448, "grad_norm": 0.01112829614430666, "learning_rate": 4.388888888888889e-06, "loss": 0.0484, "num_tokens": 8732867.0, "reward": 1.0020275115966797, "reward_std": 0.0062466030940413475, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8360863327980042, "rewards/format_reward_step": 1.0, "step": 42 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4769623838365078, "aux_distill/mean_u": 0.2908125661926928, "aux_distill/n_active_tok": 244.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4851258581235698, "calib/avg_num_step_conf": 3.82421875, "calib/ece": 0.16768627450980395, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005804035781152486, "calib/mean_conf": 0.014823529411764708, "calib/mu_c": 0.014347826086956523, "calib/mu_w": 0.014928229665071771, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010588235294117648, "calib/std_conf": 0.01987875591084262, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 373.0, "completions/max_terminated_length": 373.0, "completions/mean_length": 172.5859375, "completions/mean_terminated_length": 173.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.04586666666666667, "grad_norm": 0.01158448588103056, "learning_rate": 4.361111111111112e-06, "loss": 0.0236, "num_tokens": 8882273.0, "reward": 0.9964126944541931, "reward_std": 0.02189866080880165, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8170441389083862, "rewards/format_reward_step": 0.9921875, "step": 43 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48727248795330524, "aux_distill/mean_u": 0.32562953287294033, "aux_distill/n_active_tok": 270.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5547118785105055, "calib/avg_num_step_conf": 4.23828125, "calib/ece": 0.1688235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017994591221135846, "calib/mean_conf": 0.011568627450980393, "calib/mu_c": 0.013043478260869568, "calib/mu_w": 0.011244019138755983, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.015539014658191224, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 188.04296875, "completions/mean_terminated_length": 188.78041076660156, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.046933333333333334, "grad_norm": 0.012537293136119843, "learning_rate": 4.333333333333334e-06, "loss": 0.0531, "num_tokens": 9036732.0, "reward": 0.9982506036758423, "reward_std": 0.015460444614291191, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8207198977470398, "rewards/format_reward_step": 0.99609375, "step": 44 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4742131344974041, "aux_distill/mean_u": 0.26243291542283825, "aux_distill/n_active_tok": 262.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4895609756097561, "calib/avg_num_step_conf": 4.10546875, "calib/ece": 0.18729411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012634146341463422, "calib/mean_conf": 0.008784313725490198, "calib/mu_c": 0.009800000000000001, "calib/mu_w": 0.00853658536585366, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013478106758144088, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2400.0, "completions/max_terminated_length": 2400.0, "completions/mean_length": 201.03515625, "completions/mean_terminated_length": 201.03515625, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.048, "grad_norm": 0.009973783977329731, "learning_rate": 4.305555555555556e-06, "loss": 0.0858, "num_tokens": 9193245.0, "reward": 0.997878909111023, "reward_std": 0.015608172863721848, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8043515682220459, "rewards/format_reward_step": 0.99609375, "step": 45 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4932613065466285, "aux_distill/mean_u": 0.3355272456696168, "aux_distill/n_active_tok": 263.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.43321371610845294, "calib/avg_num_step_conf": 4.11328125, "calib/ece": 0.2507421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007480063795853274, "calib/mean_conf": 0.0070703125, "calib/mu_c": 0.0065151515151515155, "calib/mu_w": 0.007263157894736843, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010402586512610399, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 452.0, "completions/max_terminated_length": 452.0, "completions/mean_length": 179.2421875, "completions/mean_terminated_length": 179.9451141357422, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.04906666666666667, "grad_norm": 0.010921996086835861, "learning_rate": 4.277777777777778e-06, "loss": 0.0575, "num_tokens": 9343899.0, "reward": 1.0016005039215088, "reward_std": 0.0036760293878614902, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7453886270523071, "rewards/format_reward_step": 1.0, "step": 46 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4907516334205866, "aux_distill/mean_u": 0.321547496121402, "aux_distill/n_active_tok": 279.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45729638009049767, "calib/avg_num_step_conf": 4.37890625, "calib/ece": 0.1972265625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003495475113122172, "calib/mean_conf": 0.0085546875, "calib/mu_c": 0.00576923076923077, "calib/mu_w": 0.009264705882352942, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001328125, "calib/std_conf": 0.022943990428374566, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 462.0, "completions/max_terminated_length": 462.0, "completions/mean_length": 189.0625, "completions/mean_terminated_length": 189.80393981933594, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.050133333333333335, "grad_norm": 0.011003137566149235, "learning_rate": 4.25e-06, "loss": 0.0596, "num_tokens": 9498275.0, "reward": 1.000872015953064, "reward_std": 0.00331785692833364, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.7986191511154175, "rewards/format_reward_step": 1.0, "step": 47 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4879733482375741, "aux_distill/mean_u": 0.30240783007869915, "aux_distill/n_active_tok": 255.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45755091245702195, "calib/avg_num_step_conf": 3.98828125, "calib/ece": 0.2168359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0013938111610685014, "calib/mean_conf": 0.0058203125, "calib/mu_c": 0.004736842105263158, "calib/mu_w": 0.00613065326633166, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008844961130629334, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 492.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 181.05859375, "completions/mean_terminated_length": 181.7686309814453, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.0512, "grad_norm": 0.011607394553720951, "learning_rate": 4.222222222222223e-06, "loss": 0.0429, "num_tokens": 9648314.0, "reward": 1.0009984970092773, "reward_std": 0.002487377729266882, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7793409824371338, "rewards/format_reward_step": 1.0, "step": 48 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.490774248726666, "aux_distill/mean_u": 0.32144850803263364, "aux_distill/n_active_tok": 261.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5138048780487804, "calib/avg_num_step_conf": 4.1015625, "calib/ece": 0.19094117647058823, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010731707317073172, "calib/mean_conf": 0.005137254901960785, "calib/mu_c": 0.006, "calib/mu_w": 0.004926829268292683, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00989557161373086, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 642.0, "completions/max_terminated_length": 642.0, "completions/mean_length": 186.28125, "completions/mean_terminated_length": 187.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.05226666666666667, "grad_norm": 0.011714048683643341, "learning_rate": 4.194444444444445e-06, "loss": 0.0393, "num_tokens": 9800538.0, "reward": 0.9972037076950073, "reward_std": 0.013753028586506844, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8030011653900146, "rewards/format_reward_step": 0.99609375, "step": 49 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4833144247531891, "aux_distill/mean_u": 0.3095816373875393, "aux_distill/n_active_tok": 267.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5081073896863371, "calib/avg_num_step_conf": 4.19140625, "calib/ece": 0.21882352941176472, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006379585326953749, "calib/mean_conf": 0.004705882352941177, "calib/mu_c": 0.004210526315789474, "calib/mu_w": 0.0048484848484848485, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007809732785584766, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 454.0, "completions/max_terminated_length": 454.0, "completions/mean_length": 190.09765625, "completions/mean_terminated_length": 190.84315490722656, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.05333333333333334, "grad_norm": 0.008671514689922333, "learning_rate": 4.166666666666667e-06, "loss": 0.0564, "num_tokens": 9954563.0, "reward": 0.9969898462295532, "reward_std": 0.012864282354712486, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7752296924591064, "rewards/format_reward_step": 0.99609375, "step": 50 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46149670518934727, "aux_distill/mean_u": 0.2551955135773979, "aux_distill/n_active_tok": 255.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5217850438047559, "calib/avg_num_step_conf": 4.015625, "calib/ece": 0.2618359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008479349186483118, "calib/mean_conf": 0.0037890625000000003, "calib/mu_c": 0.004411764705882354, "calib/mu_w": 0.003563829787234042, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0064422341133409415, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 480.0, "completions/max_terminated_length": 480.0, "completions/mean_length": 180.6328125, "completions/mean_terminated_length": 181.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.0544, "grad_norm": 0.009305499494075775, "learning_rate": 4.138888888888889e-06, "loss": 0.0442, "num_tokens": 10110101.0, "reward": 1.0011439323425293, "reward_std": 0.0023848102428019047, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7366628646850586, "rewards/format_reward_step": 1.0, "step": 51 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48208334017544985, "aux_distill/mean_u": 0.31483845365810226, "aux_distill/n_active_tok": 242.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48001508295625944, "calib/avg_num_step_conf": 3.80078125, "calib/ece": 0.1998828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006900452488687778, "calib/mean_conf": 0.0032421875000000003, "calib/mu_c": 0.0026923076923076926, "calib/mu_w": 0.0033823529411764705, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005799038300860216, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 179.19140625, "completions/mean_terminated_length": 179.89413452148438, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.055466666666666664, "grad_norm": 0.008505421690642834, "learning_rate": 4.111111111111111e-06, "loss": 0.045, "num_tokens": 10263926.0, "reward": 1.0005247592926025, "reward_std": 0.0010291299549862742, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.7979245781898499, "rewards/format_reward_step": 1.0, "step": 52 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4751516552641988, "aux_distill/mean_u": 0.3170315844694307, "aux_distill/n_active_tok": 251.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5117115737905695, "calib/avg_num_step_conf": 3.94921875, "calib/ece": 0.2751764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00017375995101041007, "calib/mean_conf": 0.003254901960784314, "calib/mu_c": 0.0033802816901408453, "calib/mu_w": 0.003206521739130435, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005386414054167887, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 481.0, "completions/max_terminated_length": 481.0, "completions/mean_length": 184.03125, "completions/mean_terminated_length": 184.75294494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.05653333333333333, "grad_norm": 0.008253109641373158, "learning_rate": 4.083333333333334e-06, "loss": 0.0498, "num_tokens": 10416862.0, "reward": 0.9970114827156067, "reward_std": 0.012826445512473583, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7205855250358582, "rewards/format_reward_step": 0.99609375, "step": 53 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46900024451315403, "aux_distill/mean_u": 0.26208520407903335, "aux_distill/n_active_tok": 254.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5501342233185809, "calib/avg_num_step_conf": 3.99609375, "calib/ece": 0.29734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001398824639048103, "calib/mean_conf": 0.0034375000000000005, "calib/mu_c": 0.004415584415584416, "calib/mu_w": 0.003016759776536313, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006547602137424051, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 181.4609375, "completions/mean_terminated_length": 182.1725616455078, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.0576, "grad_norm": 0.0095495181158185, "learning_rate": 4.055555555555556e-06, "loss": 0.0294, "num_tokens": 10569548.0, "reward": 0.9973945021629333, "reward_std": 0.013665013015270233, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.6979140043258667, "rewards/format_reward_step": 0.99609375, "step": 54 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4792042141780257, "aux_distill/mean_u": 0.3025881007964957, "aux_distill/n_active_tok": 244.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5178941141674062, "calib/avg_num_step_conf": 3.8203125, "calib/ece": 0.1881640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000533372769397614, "calib/mean_conf": 0.0032421875000000003, "calib/mu_c": 0.003673469387755102, "calib/mu_w": 0.003140096618357488, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005523028627016499, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 555.0, "completions/max_terminated_length": 555.0, "completions/mean_length": 182.6015625, "completions/mean_terminated_length": 183.31765747070312, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.058666666666666666, "grad_norm": 0.008562219329178333, "learning_rate": 4.027777777777779e-06, "loss": 0.058, "num_tokens": 10724118.0, "reward": 1.0006825923919678, "reward_std": 0.001537383534014225, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8099589943885803, "rewards/format_reward_step": 1.0, "step": 55 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4713021432980895, "aux_distill/mean_u": 0.2991845504353514, "aux_distill/n_active_tok": 249.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.518120737045709, "calib/avg_num_step_conf": 3.9140625, "calib/ece": 0.180078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011666496996844146, "calib/mean_conf": 0.003515625, "calib/mu_c": 0.004468085106382979, "calib/mu_w": 0.0033014354066985643, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006071172115775915, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 449.0, "completions/max_terminated_length": 449.0, "completions/mean_length": 183.203125, "completions/mean_terminated_length": 183.9215850830078, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.05973333333333333, "grad_norm": 0.005701439920812845, "learning_rate": 4.000000000000001e-06, "loss": 0.0394, "num_tokens": 10877858.0, "reward": 1.000795602798462, "reward_std": 0.0013140874216333032, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8179976344108582, "rewards/format_reward_step": 1.0, "step": 56 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47348556853830814, "aux_distill/mean_u": 0.2610358596491855, "aux_distill/n_active_tok": 250.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5510937376609019, "calib/avg_num_step_conf": 3.921875, "calib/ece": 0.2584375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014183052989023143, "calib/mean_conf": 0.0032812500000000003, "calib/mu_c": 0.004328358208955225, "calib/mu_w": 0.0029100529100529104, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006073684914242754, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 411.0, "completions/max_terminated_length": 411.0, "completions/mean_length": 181.84375, "completions/mean_terminated_length": 182.55686950683594, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.0608, "grad_norm": 0.00831560418009758, "learning_rate": 3.972222222222223e-06, "loss": 0.0481, "num_tokens": 11031202.0, "reward": 1.001109004020691, "reward_std": 0.002224062103778124, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7404991984367371, "rewards/format_reward_step": 1.0, "step": 57 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.45535885356366634, "aux_distill/mean_u": 0.25883576058026386, "aux_distill/n_active_tok": 226.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4745469190495369, "calib/avg_num_step_conf": 3.53515625, "calib/ece": 0.2484765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006790173177607731, "calib/mean_conf": 0.0054296875000000005, "calib/mu_c": 0.004923076923076923, "calib/mu_w": 0.005602094240837696, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007487681126513318, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 480.0, "completions/max_terminated_length": 480.0, "completions/mean_length": 162.53125, "completions/mean_terminated_length": 163.16864013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.06186666666666667, "grad_norm": 0.011086608283221722, "learning_rate": 3.944444444444445e-06, "loss": 0.0392, "num_tokens": 11179130.0, "reward": 0.9973009824752808, "reward_std": 0.013514685444533825, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7446019649505615, "rewards/format_reward_step": 0.99609375, "step": 58 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4486118769273162, "aux_distill/mean_u": 0.24863800890353696, "aux_distill/n_active_tok": 227.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5085217825074826, "calib/avg_num_step_conf": 3.5625, "calib/ece": 0.2369921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00038077818423678123, "calib/mean_conf": 0.0051953125, "calib/mu_c": 0.005483870967741936, "calib/mu_w": 0.005103092783505155, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006956964354324646, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 512.0, "completions/max_terminated_length": 512.0, "completions/mean_length": 160.734375, "completions/mean_terminated_length": 161.36471557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.06293333333333333, "grad_norm": 0.008816853165626526, "learning_rate": 3.916666666666667e-06, "loss": 0.0604, "num_tokens": 11326526.0, "reward": 1.0012903213500977, "reward_std": 0.0024084849283099174, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7603933811187744, "rewards/format_reward_step": 1.0, "step": 59 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47772796731442213, "aux_distill/mean_u": 0.25235217135883486, "aux_distill/n_active_tok": 194.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5245481294661622, "calib/avg_num_step_conf": 3.05078125, "calib/ece": 0.2324609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007524169819251789, "calib/mean_conf": 0.0058203125, "calib/mu_c": 0.00639344262295082, "calib/mu_w": 0.0056410256410256415, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0068544392478410475, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 401.0, "completions/max_terminated_length": 401.0, "completions/mean_length": 143.34765625, "completions/mean_terminated_length": 143.90980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.064, "grad_norm": 0.012299815192818642, "learning_rate": 3.88888888888889e-06, "loss": 0.039, "num_tokens": 11472079.0, "reward": 0.9975767135620117, "reward_std": 0.013632569462060928, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7607784867286682, "rewards/format_reward_step": 0.99609375, "step": 60 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47281746193766594, "aux_distill/mean_u": 0.2551864848975026, "aux_distill/n_active_tok": 169.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45449197120708745, "calib/avg_num_step_conf": 2.65625, "calib/ece": 0.3207421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001065891472868217, "calib/mean_conf": 0.0073828125000000005, "calib/mu_c": 0.006666666666666667, "calib/mu_w": 0.007732558139534884, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007892461883965215, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 255.0, "completions/max_terminated_length": 255.0, "completions/mean_length": 123.0390625, "completions/mean_terminated_length": 123.52157592773438, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.06506666666666666, "grad_norm": 0.012310824356973171, "learning_rate": 3.861111111111112e-06, "loss": 0.0563, "num_tokens": 11607641.0, "reward": 1.002129077911377, "reward_std": 0.003747039008885622, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6761331558227539, "rewards/format_reward_step": 1.0, "step": 61 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47183617390692234, "aux_distill/mean_u": 0.25694304938121587, "aux_distill/n_active_tok": 170.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.3787030438766161, "calib/avg_num_step_conf": 2.66015625, "calib/ece": 0.17484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003420543622111371, "calib/mean_conf": 0.00875, "calib/mu_c": 0.005957446808510639, "calib/mu_w": 0.00937799043062201, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0082915619758885, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 294.0, "completions/max_terminated_length": 294.0, "completions/mean_length": 128.25, "completions/mean_terminated_length": 128.75294494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.06613333333333334, "grad_norm": 0.010938399471342564, "learning_rate": 3.833333333333334e-06, "loss": 0.0424, "num_tokens": 11747553.0, "reward": 1.0010210275650024, "reward_std": 0.002157938200980425, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8184484243392944, "rewards/format_reward_step": 1.0, "step": 62 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49205196741968393, "aux_distill/mean_u": 0.30069891272887367, "aux_distill/n_active_tok": 155.78125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5284027282907462, "calib/avg_num_step_conf": 2.4375, "calib/ece": 0.174609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007227934439580589, "calib/mean_conf": 0.008984375, "calib/mu_c": 0.009574468085106385, "calib/mu_w": 0.008851674641148326, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008181519165740246, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 274.0, "completions/max_terminated_length": 274.0, "completions/mean_length": 116.296875, "completions/mean_terminated_length": 116.75294494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.0672, "grad_norm": 0.011508263647556305, "learning_rate": 3.8055555555555556e-06, "loss": 0.0446, "num_tokens": 11885965.0, "reward": 1.0016839504241943, "reward_std": 0.002663816325366497, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8197742104530334, "rewards/format_reward_step": 1.0, "step": 63 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4663435900583863, "aux_distill/mean_u": 0.2517591965568636, "aux_distill/n_active_tok": 150.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.44442207007652035, "calib/avg_num_step_conf": 2.35546875, "calib/ece": 0.2445703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017905759162303667, "calib/mean_conf": 0.009335937499999999, "calib/mu_c": 0.008, "calib/mu_w": 0.009790575916230367, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00809781736001089, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 280.0, "completions/max_terminated_length": 280.0, "completions/mean_length": 115.34375, "completions/mean_terminated_length": 115.79608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.06826666666666667, "grad_norm": 0.012494472786784172, "learning_rate": 3.777777777777778e-06, "loss": 0.044, "num_tokens": 12019269.0, "reward": 1.0019547939300537, "reward_std": 0.0032990314066410065, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7500035762786865, "rewards/format_reward_step": 1.0, "step": 64 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48996405489742756, "aux_distill/mean_u": 0.25722923283434773, "aux_distill/n_active_tok": 134.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49447898799313894, "calib/avg_num_step_conf": 2.1015625, "calib/ece": 0.16171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00018867924528301674, "calib/mean_conf": 0.010156249999999999, "calib/mu_c": 0.010000000000000002, "calib/mu_w": 0.010188679245283019, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008383799015810196, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 244.0, "completions/max_terminated_length": 244.0, "completions/mean_length": 110.921875, "completions/mean_terminated_length": 111.35687255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.06933333333333333, "grad_norm": 0.01302141323685646, "learning_rate": 3.7500000000000005e-06, "loss": 0.0479, "num_tokens": 12152689.0, "reward": 1.0016319751739502, "reward_std": 0.0025428631342947483, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8313890695571899, "rewards/format_reward_step": 1.0, "step": 65 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4909752355888486, "aux_distill/mean_u": 0.26345752851416304, "aux_distill/n_active_tok": 152.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5645089285714286, "calib/avg_num_step_conf": 2.390625, "calib/ece": 0.2095703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002192857142857144, "calib/mean_conf": 0.009179687499999999, "calib/mu_c": 0.010892857142857143, "calib/mu_w": 0.0087, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00753319071856964, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 321.0, "completions/max_terminated_length": 321.0, "completions/mean_length": 121.94921875, "completions/mean_terminated_length": 122.42745971679688, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.0704, "grad_norm": 0.013737301342189312, "learning_rate": 3.7222222222222225e-06, "loss": 0.045, "num_tokens": 12290260.0, "reward": 1.0023123025894165, "reward_std": 0.0038235776592046022, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.785874605178833, "rewards/format_reward_step": 1.0, "step": 66 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4867994925007224, "aux_distill/mean_u": 0.2638074353122962, "aux_distill/n_active_tok": 157.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5015463917525774, "calib/avg_num_step_conf": 2.4609375, "calib/ece": 0.22720472440944883, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009793814432989666, "calib/mean_conf": 0.010748031496062993, "calib/mu_c": 0.010000000000000002, "calib/mu_w": 0.010979381443298969, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008661417322834645, "calib/std_conf": 0.014974765390055263, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1753.0, "completions/max_terminated_length": 1753.0, "completions/mean_length": 136.06640625, "completions/mean_terminated_length": 136.06640625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.07146666666666666, "grad_norm": 0.012562266550958157, "learning_rate": 3.694444444444445e-06, "loss": 0.0889, "num_tokens": 12430101.0, "reward": 0.9963157773017883, "reward_std": 0.019671740010380745, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7621628642082214, "rewards/format_reward_step": 0.9921875, "step": 67 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.469187312759459, "aux_distill/mean_u": 0.24902920117743751, "aux_distill/n_active_tok": 162.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5078149920255183, "calib/avg_num_step_conf": 2.54296875, "calib/ece": 0.24796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006188197767145118, "calib/mean_conf": 0.009843750000000002, "calib/mu_c": 0.010303030303030302, "calib/mu_w": 0.00968421052631579, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007179003129787589, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 411.0, "completions/max_terminated_length": 411.0, "completions/mean_length": 129.4921875, "completions/mean_terminated_length": 130.00001525878906, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.07253333333333334, "grad_norm": 0.01084089931100607, "learning_rate": 3.6666666666666666e-06, "loss": 0.0456, "num_tokens": 12567339.0, "reward": 1.00258207321167, "reward_std": 0.003189860377460718, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7473515272140503, "rewards/format_reward_step": 1.0, "step": 68 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4741177558898926, "aux_distill/mean_u": 0.2582151062249327, "aux_distill/n_active_tok": 153.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.468495145631068, "calib/avg_num_step_conf": 2.421875, "calib/ece": 0.1847265625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007281553398058253, "calib/mean_conf": 0.0105859375, "calib/mu_c": 0.01, "calib/mu_w": 0.010728155339805825, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007074411795060685, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 340.0, "completions/max_terminated_length": 340.0, "completions/mean_length": 129.60546875, "completions/mean_terminated_length": 130.11373901367188, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.0736, "grad_norm": 0.013099942356348038, "learning_rate": 3.638888888888889e-06, "loss": 0.0516, "num_tokens": 12705014.0, "reward": 1.0018720626831055, "reward_std": 0.002898347331210971, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8084316253662109, "rewards/format_reward_step": 1.0, "step": 69 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.493006375618279, "aux_distill/mean_u": 0.262141115781891, "aux_distill/n_active_tok": 160.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.554221931627125, "calib/avg_num_step_conf": 2.5, "calib/ece": 0.198, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001150756585092472, "calib/mean_conf": 0.009843137254901962, "calib/mu_c": 0.010754716981132076, "calib/mu_w": 0.009603960396039604, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007354797151385198, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 372.0, "completions/max_terminated_length": 372.0, "completions/mean_length": 133.13671875, "completions/mean_terminated_length": 133.65882873535156, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.07466666666666667, "grad_norm": 0.014240157790482044, "learning_rate": 3.6111111111111115e-06, "loss": 0.0264, "num_tokens": 12846089.0, "reward": 0.9982450008392334, "reward_std": 0.013962855562567711, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7933652400970459, "rewards/format_reward_step": 0.99609375, "step": 70 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47487139515578747, "aux_distill/mean_u": 0.27355696705036336, "aux_distill/n_active_tok": 168.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48920236851271337, "calib/avg_num_step_conf": 2.63671875, "calib/ece": 0.21640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00020202020202019985, "calib/mean_conf": 0.010156249999999999, "calib/mu_c": 0.010000000000000002, "calib/mu_w": 0.010202020202020202, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.007601847534481339, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 377.0, "completions/max_terminated_length": 377.0, "completions/mean_length": 144.0546875, "completions/mean_terminated_length": 144.61961364746094, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.07573333333333333, "grad_norm": 0.010125796310603619, "learning_rate": 3.5833333333333335e-06, "loss": 0.0251, "num_tokens": 12987375.0, "reward": 0.9982790946960449, "reward_std": 0.013809540309011936, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7739019393920898, "rewards/format_reward_step": 0.99609375, "step": 71 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48494631331413984, "aux_distill/mean_u": 0.26469371248366147, "aux_distill/n_active_tok": 172.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5170990566037735, "calib/avg_num_step_conf": 2.71875, "calib/ece": 0.16125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006174957118353329, "calib/mean_conf": 0.010625, "calib/mu_c": 0.011136363636363637, "calib/mu_w": 0.010518867924528304, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006643841132959155, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 331.0, "completions/max_terminated_length": 331.0, "completions/mean_length": 140.33203125, "completions/mean_terminated_length": 140.88235473632812, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.0768, "grad_norm": 0.011124787852168083, "learning_rate": 3.555555555555556e-06, "loss": 0.0294, "num_tokens": 13127708.0, "reward": 1.001835584640503, "reward_std": 0.002578023122623563, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8317961096763611, "rewards/format_reward_step": 1.0, "step": 72 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4593647988513112, "aux_distill/mean_u": 0.2135954470103809, "aux_distill/n_active_tok": 163.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.44977912113461993, "calib/avg_num_step_conf": 2.578125, "calib/ece": 0.2582421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001169495466170659, "calib/mean_conf": 0.0112890625, "calib/mu_c": 0.010434782608695651, "calib/mu_w": 0.01160427807486631, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006019048335999117, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 422.0, "completions/max_terminated_length": 422.0, "completions/mean_length": 141.09375, "completions/mean_terminated_length": 141.64706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.07786666666666667, "grad_norm": 0.012260450050234795, "learning_rate": 3.5277777777777784e-06, "loss": 0.0498, "num_tokens": 13270860.0, "reward": 1.0027306079864502, "reward_std": 0.0034748294856399298, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7359300851821899, "rewards/format_reward_step": 1.0, "step": 73 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47802817448973656, "aux_distill/mean_u": 0.22195564799117634, "aux_distill/n_active_tok": 156.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5704526442977972, "calib/avg_num_step_conf": 2.4453125, "calib/ece": 0.1956640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018449670043684356, "calib/mean_conf": 0.0113671875, "calib/mu_c": 0.012830188679245284, "calib/mu_w": 0.010985221674876849, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005870172343282925, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 364.0, "completions/max_terminated_length": 364.0, "completions/mean_length": 131.05859375, "completions/mean_terminated_length": 131.5725555419922, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.07893333333333333, "grad_norm": 0.011557850986719131, "learning_rate": 3.5e-06, "loss": 0.0515, "num_tokens": 13408339.0, "reward": 1.0025744438171387, "reward_std": 0.0030165817588567734, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7981175184249878, "rewards/format_reward_step": 1.0, "step": 74 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.45104537066072226, "aux_distill/mean_u": 0.23226746896270406, "aux_distill/n_active_tok": 157.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4995579133510168, "calib/avg_num_step_conf": 2.46875, "calib/ece": 0.3296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -6.257226416377278e-05, "calib/mean_conf": 0.01015625, "calib/mu_c": 0.010114942528735633, "calib/mu_w": 0.010177514792899406, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005300998579277304, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 360.0, "completions/max_terminated_length": 360.0, "completions/mean_length": 134.49609375, "completions/mean_terminated_length": 135.02354431152344, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.08, "grad_norm": 0.012841647490859032, "learning_rate": 3.4722222222222224e-06, "loss": 0.0255, "num_tokens": 13547522.0, "reward": 1.0033719539642334, "reward_std": 0.003874801332131028, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.6669000387191772, "rewards/format_reward_step": 1.0, "step": 75 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48809313867241144, "aux_distill/mean_u": 0.2720469078054451, "aux_distill/n_active_tok": 149.78125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5330114529530654, "calib/avg_num_step_conf": 2.35546875, "calib/ece": 0.2738671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006879257429448296, "calib/mean_conf": 0.011289062500000002, "calib/mu_c": 0.011780821917808219, "calib/mu_w": 0.011092896174863389, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005029059839681146, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 319.0, "completions/max_terminated_length": 319.0, "completions/mean_length": 126.2109375, "completions/mean_terminated_length": 126.70588684082031, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.08106666666666666, "grad_norm": 0.012701224535703659, "learning_rate": 3.444444444444445e-06, "loss": 0.043, "num_tokens": 13682888.0, "reward": 1.003282904624939, "reward_std": 0.003568597137928009, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.721409797668457, "rewards/format_reward_step": 1.0, "step": 76 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4653266780078411, "aux_distill/mean_u": 0.23650410068623975, "aux_distill/n_active_tok": 163.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.46630100556802123, "calib/avg_num_step_conf": 2.5546875, "calib/ece": 0.23669291338582676, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007246738136790481, "calib/mean_conf": 0.011338582677165353, "calib/mu_c": 0.010793650793650796, "calib/mu_w": 0.011518324607329844, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.004837876270647444, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 966.0, "completions/max_terminated_length": 966.0, "completions/mean_length": 139.02734375, "completions/mean_terminated_length": 139.02734375, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.08213333333333334, "grad_norm": 0.01345721073448658, "learning_rate": 3.416666666666667e-06, "loss": 0.0603, "num_tokens": 13823143.0, "reward": 0.9967213869094849, "reward_std": 0.01972099393606186, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7512555122375488, "rewards/format_reward_step": 0.9921875, "step": 77 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4758943486958742, "aux_distill/mean_u": 0.24784744747853024, "aux_distill/n_active_tok": 165.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5372364122364123, "calib/avg_num_step_conf": 2.59765625, "calib/ece": 0.277890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008226908226908226, "calib/mean_conf": 0.011171875000000001, "calib/mu_c": 0.011756756756756755, "calib/mu_w": 0.010934065934065932, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0045278812908881565, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 372.0, "completions/max_terminated_length": 372.0, "completions/mean_length": 146.38671875, "completions/mean_terminated_length": 146.96080017089844, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.0832, "grad_norm": 0.013810593634843826, "learning_rate": 3.3888888888888893e-06, "loss": 0.0437, "num_tokens": 13968642.0, "reward": 1.0033257007598877, "reward_std": 0.004381269216537476, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7175890207290649, "rewards/format_reward_step": 1.0, "step": 78 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.447107189334929, "aux_distill/mean_u": 0.21981514835849306, "aux_distill/n_active_tok": 172.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5005133064834557, "calib/avg_num_step_conf": 2.70703125, "calib/ece": 0.2505078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -2.29013661849492e-05, "calib/mean_conf": 0.0112109375, "calib/mu_c": 0.011194029850746268, "calib/mu_w": 0.011216931216931217, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.00464548225387782, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 376.0, "completions/max_terminated_length": 376.0, "completions/mean_length": 149.1171875, "completions/mean_terminated_length": 149.70196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.08426666666666667, "grad_norm": 0.01121280062943697, "learning_rate": 3.3611111111111117e-06, "loss": 0.0497, "num_tokens": 14113192.0, "reward": 0.9989499449729919, "reward_std": 0.013474896550178528, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7400875091552734, "rewards/format_reward_step": 0.99609375, "step": 79 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4624016094021499, "aux_distill/mean_u": 0.25210709773324724, "aux_distill/n_active_tok": 186.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4720052083333333, "calib/avg_num_step_conf": 2.91015625, "calib/ece": 0.363359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00045833333333333316, "calib/mean_conf": 0.011640625, "calib/mu_c": 0.011354166666666667, "calib/mu_w": 0.0118125, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00512001949306592, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 331.0, "completions/max_terminated_length": 331.0, "completions/mean_length": 157.421875, "completions/mean_terminated_length": 158.0392303466797, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.08533333333333333, "grad_norm": 0.011426246725022793, "learning_rate": 3.3333333333333333e-06, "loss": 0.0547, "num_tokens": 14255652.0, "reward": 1.0041768550872803, "reward_std": 0.004204465076327324, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6333538889884949, "rewards/format_reward_step": 1.0, "step": 80 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47395670786499977, "aux_distill/mean_u": 0.2623399104299867, "aux_distill/n_active_tok": 191.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5220156555772995, "calib/avg_num_step_conf": 2.9921875, "calib/ece": 0.2749803921568627, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004899894625922004, "calib/mean_conf": 0.011294117647058823, "calib/mu_c": 0.011643835616438355, "calib/mu_w": 0.011153846153846155, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004460256379015822, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 421.0, "completions/max_terminated_length": 421.0, "completions/mean_length": 170.6015625, "completions/mean_terminated_length": 171.27059936523438, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.0864, "grad_norm": 0.0148467430844903, "learning_rate": 3.3055555555555558e-06, "loss": 0.0472, "num_tokens": 14405574.0, "reward": 0.9993405938148499, "reward_std": 0.014343726448714733, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7174311876296997, "rewards/format_reward_step": 0.99609375, "step": 81 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.44830169156193733, "aux_distill/mean_u": 0.20830730432778605, "aux_distill/n_active_tok": 182.84375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5069738480697386, "calib/avg_num_step_conf": 2.859375, "calib/ece": 0.418125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00012951432129514572, "calib/mean_conf": 0.011562500000000002, "calib/mu_c": 0.011636363636363639, "calib/mu_w": 0.011506849315068493, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004582149468317244, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 544.0, "completions/max_terminated_length": 544.0, "completions/mean_length": 175.63671875, "completions/mean_terminated_length": 176.32550048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.08746666666666666, "grad_norm": 0.011641280725598335, "learning_rate": 3.277777777777778e-06, "loss": 0.0327, "num_tokens": 14556089.0, "reward": 1.0010164976119995, "reward_std": 0.01543247140944004, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5762519240379333, "rewards/format_reward_step": 0.99609375, "step": 82 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4452169854193926, "aux_distill/mean_u": 0.23544867615008755, "aux_distill/n_active_tok": 194.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.547075933864054, "calib/avg_num_step_conf": 3.07421875, "calib/ece": 0.26623529411764707, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008603796693202673, "calib/mean_conf": 0.01219607843137255, "calib/mu_c": 0.012816901408450702, "calib/mu_w": 0.011956521739130435, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004756912855562644, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 804.0, "completions/max_terminated_length": 804.0, "completions/mean_length": 187.96875, "completions/mean_terminated_length": 188.7058868408203, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.08853333333333334, "grad_norm": 0.009983416646718979, "learning_rate": 3.2500000000000002e-06, "loss": 0.0472, "num_tokens": 14711473.0, "reward": 0.9995629787445068, "reward_std": 0.014329729601740837, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7256886959075928, "rewards/format_reward_step": 0.99609375, "step": 83 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4570365957915783, "aux_distill/mean_u": 0.22510164656623236, "aux_distill/n_active_tok": 177.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5138473053892214, "calib/avg_num_step_conf": 2.76953125, "calib/ece": 0.3325490196078431, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002721829069134464, "calib/mean_conf": 0.012549019607843138, "calib/mu_c": 0.012727272727272728, "calib/mu_w": 0.012455089820359281, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.004868118473312227, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2196.0, "completions/max_terminated_length": 2196.0, "completions/mean_length": 175.78125, "completions/mean_terminated_length": 175.78125, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.0896, "grad_norm": 0.0139435064047575, "learning_rate": 3.2222222222222227e-06, "loss": 0.098, "num_tokens": 14862393.0, "reward": 1.0003783702850342, "reward_std": 0.015527335926890373, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6609132885932922, "rewards/format_reward_step": 0.99609375, "step": 84 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42052019108086824, "aux_distill/mean_u": 0.19761002619786422, "aux_distill/n_active_tok": 186.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.512276367991616, "calib/avg_num_step_conf": 2.921875, "calib/ece": 0.2724609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 6.213039898195737e-05, "calib/mean_conf": 0.0126953125, "calib/mu_c": 0.01273972602739726, "calib/mu_w": 0.012677595628415302, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005318097923820485, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 189.62890625, "completions/mean_terminated_length": 190.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.09066666666666667, "grad_norm": 0.012380489148199558, "learning_rate": 3.1944444444444443e-06, "loss": 0.0634, "num_tokens": 15018762.0, "reward": 0.9996320009231567, "reward_std": 0.015054484829306602, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.718014121055603, "rewards/format_reward_step": 0.99609375, "step": 85 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4251967119053006, "aux_distill/mean_u": 0.19627234303921945, "aux_distill/n_active_tok": 198.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5002316602316603, "calib/avg_num_step_conf": 3.10546875, "calib/ece": 0.26207843137254905, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00020077220077219932, "calib/mean_conf": 0.012431372549019607, "calib/mu_c": 0.012285714285714287, "calib/mu_w": 0.012486486486486486, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005199000288369114, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2301.0, "completions/max_terminated_length": 2301.0, "completions/mean_length": 206.75390625, "completions/mean_terminated_length": 206.75390625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.09173333333333333, "grad_norm": 0.010361351072788239, "learning_rate": 3.1666666666666667e-06, "loss": 0.0892, "num_tokens": 15177203.0, "reward": 0.9993627071380615, "reward_std": 0.014718771912157536, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.729194164276123, "rewards/format_reward_step": 0.99609375, "step": 86 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4048550510779023, "aux_distill/mean_u": 0.1747493499899473, "aux_distill/n_active_tok": 162.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48597391084093206, "calib/avg_num_step_conf": 2.54296875, "calib/ece": 0.4300790513833992, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -3.482776089159048e-05, "calib/mean_conf": 0.012608695652173915, "calib/mu_c": 0.012589285714285714, "calib/mu_w": 0.012624113475177305, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.0056506534896365365, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 206.109375, "completions/mean_terminated_length": 206.109375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.0928, "grad_norm": 0.01382330060005188, "learning_rate": 3.138888888888889e-06, "loss": 0.1513, "num_tokens": 15335463.0, "reward": 0.9897886514663696, "reward_std": 0.04076341167092323, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5577023029327393, "rewards/format_reward_step": 0.984375, "step": 87 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40625389851629734, "aux_distill/mean_u": 0.17802966193899944, "aux_distill/n_active_tok": 179.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5151803324880248, "calib/avg_num_step_conf": 2.80859375, "calib/ece": 0.32, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003684136376444075, "calib/mean_conf": 0.012015810276679842, "calib/mu_c": 0.012261904761904762, "calib/mu_w": 0.011893491124260355, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.004734841324391676, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1933.0, "completions/max_terminated_length": 1933.0, "completions/mean_length": 229.66015625, "completions/mean_terminated_length": 230.560791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.09386666666666667, "grad_norm": 0.01094108261168003, "learning_rate": 3.1111111111111116e-06, "loss": 0.093, "num_tokens": 15504104.0, "reward": 0.9922221899032593, "reward_std": 0.03676649183034897, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6680383086204529, "rewards/format_reward_step": 0.98828125, "step": 88 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42246151296421885, "aux_distill/mean_u": 0.21081985870031994, "aux_distill/n_active_tok": 167.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5588314124899492, "calib/avg_num_step_conf": 2.61328125, "calib/ece": 0.3439607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012972393460198364, "calib/mean_conf": 0.012901960784313726, "calib/mu_c": 0.01373626373626374, "calib/mu_w": 0.012439024390243903, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.004951747174169285, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1833.0, "completions/max_terminated_length": 1833.0, "completions/mean_length": 217.08984375, "completions/mean_terminated_length": 217.08984375, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.09493333333333333, "grad_norm": 0.012723850086331367, "learning_rate": 3.0833333333333336e-06, "loss": 0.0679, "num_tokens": 15668567.0, "reward": 0.9969753623008728, "reward_std": 0.02681366354227066, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6462945342063904, "rewards/format_reward_step": 0.9921875, "step": 89 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41852833703160286, "aux_distill/mean_u": 0.2219748970620147, "aux_distill/n_active_tok": 173.5625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4908326967150497, "calib/avg_num_step_conf": 2.73046875, "calib/ece": 0.385390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00017570664629488163, "calib/mean_conf": 0.013046875, "calib/mu_c": 0.012941176470588235, "calib/mu_w": 0.013116883116883117, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.004850675492586058, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 546.0, "completions/max_terminated_length": 546.0, "completions/mean_length": 208.47265625, "completions/mean_terminated_length": 209.2902069091797, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.096, "grad_norm": 0.011926371604204178, "learning_rate": 3.055555555555556e-06, "loss": 0.0544, "num_tokens": 15825256.0, "reward": 0.9991612434387207, "reward_std": 0.021257756277918816, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.607697606086731, "rewards/format_reward_step": 0.9921875, "step": 90 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38081121258437634, "aux_distill/mean_u": 0.160952301445108, "aux_distill/n_active_tok": 175.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5157251019219569, "calib/avg_num_step_conf": 2.734375, "calib/ece": 0.3838582677165355, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0001358959425354321, "calib/mean_conf": 0.013779527559055118, "calib/mu_c": 0.013861386138613863, "calib/mu_w": 0.013725490196078431, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005531461036120781, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 240.15625, "completions/mean_terminated_length": 240.15625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.09706666666666666, "grad_norm": 0.012279514223337173, "learning_rate": 3.0277777777777776e-06, "loss": 0.1304, "num_tokens": 15994448.0, "reward": 0.9975468516349792, "reward_std": 0.02685142122209072, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6083749532699585, "rewards/format_reward_step": 0.9921875, "step": 91 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4037033971399069, "aux_distill/mean_u": 0.150256089948064, "aux_distill/n_active_tok": 164.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5810709604942895, "calib/avg_num_step_conf": 2.57421875, "calib/ece": 0.4109765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016981838607002427, "calib/mean_conf": 0.0148046875, "calib/mu_c": 0.015779816513761466, "calib/mu_w": 0.014081632653061223, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005586756932903359, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 849.0, "completions/max_terminated_length": 849.0, "completions/mean_length": 203.796875, "completions/mean_terminated_length": 204.59608459472656, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.09813333333333334, "grad_norm": 0.014511768706142902, "learning_rate": 3e-06, "loss": 0.0525, "num_tokens": 16153340.0, "reward": 1.0065934658050537, "reward_std": 0.006135149858891964, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5874058604240417, "rewards/format_reward_step": 1.0, "step": 92 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3932137689553201, "aux_distill/mean_u": 0.17165799268354254, "aux_distill/n_active_tok": 188.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49097179955366205, "calib/avg_num_step_conf": 2.953125, "calib/ece": 0.3535714285714286, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00015824710894704955, "calib/mean_conf": 0.015476190476190473, "calib/mu_c": 0.015376344086021504, "calib/mu_w": 0.015534591194968554, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.005788212276734865, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2072.0, "completions/max_terminated_length": 2072.0, "completions/mean_length": 254.62109375, "completions/mean_terminated_length": 255.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.0992, "grad_norm": 0.010879608802497387, "learning_rate": 2.9722222222222225e-06, "loss": 0.0918, "num_tokens": 16324299.0, "reward": 0.989826500415802, "reward_std": 0.048621803522109985, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6319968700408936, "rewards/format_reward_step": 0.984375, "step": 93 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33277420699596405, "aux_distill/mean_u": 0.15313231940754068, "aux_distill/n_active_tok": 200.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6165608965703484, "calib/avg_num_step_conf": 3.125, "calib/ece": 0.34707509881422927, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0024999999999999988, "calib/mean_conf": 0.016561264822134384, "calib/mu_c": 0.018152173913043478, "calib/mu_w": 0.01565217391304348, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005933858230446628, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2009.0, "completions/max_terminated_length": 2009.0, "completions/mean_length": 247.44140625, "completions/mean_terminated_length": 247.44140625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.10026666666666667, "grad_norm": 0.011224609799683094, "learning_rate": 2.944444444444445e-06, "loss": 0.1157, "num_tokens": 16496324.0, "reward": 0.994651734828949, "reward_std": 0.03712940961122513, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.641647219657898, "rewards/format_reward_step": 0.98828125, "step": 94 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3890888011083007, "aux_distill/mean_u": 0.201429254871052, "aux_distill/n_active_tok": 186.4375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5229846153846154, "calib/avg_num_step_conf": 2.89453125, "calib/ece": 0.4926274509803921, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005815384615384701, "calib/mean_conf": 0.017176470588235297, "calib/mu_c": 0.017461538461538466, "calib/mu_w": 0.016879999999999996, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005728175572238498, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2106.0, "completions/max_terminated_length": 2106.0, "completions/mean_length": 236.3125, "completions/mean_terminated_length": 236.3125, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.10133333333333333, "grad_norm": 0.011299879290163517, "learning_rate": 2.916666666666667e-06, "loss": 0.1178, "num_tokens": 16662948.0, "reward": 1.0047976970672607, "reward_std": 0.016820628196001053, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5056890249252319, "rewards/format_reward_step": 0.99609375, "step": 95 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37238421756774187, "aux_distill/mean_u": 0.1852877839684512, "aux_distill/n_active_tok": 175.1875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5490486391129032, "calib/avg_num_step_conf": 2.734375, "calib/ece": 0.4747222222222222, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014238911290322571, "calib/mean_conf": 0.01734126984126984, "calib/mu_c": 0.018064516129032256, "calib/mu_w": 0.016640625, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.00601540706827285, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 260.6484375, "completions/mean_terminated_length": 260.6484375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.1024, "grad_norm": 0.012914830818772316, "learning_rate": 2.888888888888889e-06, "loss": 0.0519, "num_tokens": 16835490.0, "reward": 0.9929591417312622, "reward_std": 0.03523903712630272, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5171683430671692, "rewards/format_reward_step": 0.984375, "step": 96 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38620558474212885, "aux_distill/mean_u": 0.1596223525620612, "aux_distill/n_active_tok": 178.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5661207622254535, "calib/avg_num_step_conf": 2.79296875, "calib/ece": 0.2707539682539683, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015083798882681576, "calib/mean_conf": 0.018928571428571427, "calib/mu_c": 0.02, "calib/mu_w": 0.018491620111731843, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.005708081949874692, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2262.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 256.55078125, "completions/mean_terminated_length": 257.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.10346666666666667, "grad_norm": 0.010874838568270206, "learning_rate": 2.861111111111111e-06, "loss": 0.159, "num_tokens": 17006239.0, "reward": 0.985979437828064, "reward_std": 0.060727715492248535, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7063339948654175, "rewards/format_reward_step": 0.98046875, "step": 97 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3821967989206314, "aux_distill/mean_u": 0.23914681162299736, "aux_distill/n_active_tok": 170.96875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6189378630967416, "calib/avg_num_step_conf": 2.66015625, "calib/ece": 0.399921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0031169487244253544, "calib/mean_conf": 0.019686274509803925, "calib/mu_c": 0.021495327102803732, "calib/mu_w": 0.018378378378378378, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.006316769452560481, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2458.0, "completions/max_terminated_length": 2458.0, "completions/mean_length": 251.21875, "completions/mean_terminated_length": 251.21875, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.10453333333333334, "grad_norm": 0.014738868921995163, "learning_rate": 2.8333333333333335e-06, "loss": 0.0794, "num_tokens": 17176735.0, "reward": 0.9970546960830688, "reward_std": 0.040115561336278915, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5878593921661377, "rewards/format_reward_step": 0.98828125, "step": 98 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38096280861645937, "aux_distill/mean_u": 0.17522439725132843, "aux_distill/n_active_tok": 177.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5390576242978989, "calib/avg_num_step_conf": 2.76953125, "calib/ece": 0.1591372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013865196588308745, "calib/mean_conf": 0.021254901960784313, "calib/mu_c": 0.022391304347826088, "calib/mu_w": 0.021004784688995214, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.00713962468738118, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1888.0, "completions/max_terminated_length": 1888.0, "completions/mean_length": 266.7265625, "completions/mean_terminated_length": 266.7265625, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.1056, "grad_norm": 0.010448206216096878, "learning_rate": 2.805555555555556e-06, "loss": 0.0491, "num_tokens": 17350817.0, "reward": 0.999866783618927, "reward_std": 0.015927515923976898, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.823952317237854, "rewards/format_reward_step": 0.99609375, "step": 99 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3705893671140075, "aux_distill/mean_u": 0.18667120212281332, "aux_distill/n_active_tok": 173.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5634003708458137, "calib/avg_num_step_conf": 2.6796875, "calib/ece": 0.3024505928853755, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018741976893453145, "calib/mean_conf": 0.0216600790513834, "calib/mu_c": 0.022926829268292682, "calib/mu_w": 0.021052631578947368, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.006683933541162994, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2583.0, "completions/max_terminated_length": 2583.0, "completions/mean_length": 260.68359375, "completions/mean_terminated_length": 261.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.10666666666666667, "grad_norm": 0.012456218712031841, "learning_rate": 2.7777777777777783e-06, "loss": 0.0995, "num_tokens": 17524960.0, "reward": 0.9914649724960327, "reward_std": 0.0515936017036438, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.678242564201355, "rewards/format_reward_step": 0.984375, "step": 100 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3266990319825709, "aux_distill/mean_u": 0.12328381091635676, "aux_distill/n_active_tok": 181.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5713571428571429, "calib/avg_num_step_conf": 2.8359375, "calib/ece": 0.2909411764705882, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002317857142857146, "calib/mean_conf": 0.0227843137254902, "calib/mu_c": 0.024375, "calib/mu_w": 0.022057142857142855, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008194483379833082, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 299.1015625, "completions/mean_terminated_length": 299.1015625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.10773333333333333, "grad_norm": 0.012489385902881622, "learning_rate": 2.7500000000000004e-06, "loss": 0.0222, "num_tokens": 17708522.0, "reward": 1.0034189224243164, "reward_std": 0.018446702510118484, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6982441544532776, "rewards/format_reward_step": 0.99609375, "step": 101 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35586046427488327, "aux_distill/mean_u": 0.1803295721689139, "aux_distill/n_active_tok": 199.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5229989932041279, "calib/avg_num_step_conf": 3.109375, "calib/ece": 0.4353754940711463, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004423609363201561, "calib/mean_conf": 0.023122529644268777, "calib/mu_c": 0.02336206896551724, "calib/mu_w": 0.022919708029197083, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008108259971591179, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1974.0, "completions/max_terminated_length": 1974.0, "completions/mean_length": 249.71484375, "completions/mean_terminated_length": 250.6941375732422, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.1088, "grad_norm": 0.01360093243420124, "learning_rate": 2.7222222222222224e-06, "loss": 0.0679, "num_tokens": 17879145.0, "reward": 0.9985705018043518, "reward_std": 0.03953090310096741, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5557347536087036, "rewards/format_reward_step": 0.98828125, "step": 102 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3942983839660883, "aux_distill/mean_u": 0.19361853840302062, "aux_distill/n_active_tok": 183.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5702050433010698, "calib/avg_num_step_conf": 2.86328125, "calib/ece": 0.3844705882352941, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002423586347427408, "calib/mean_conf": 0.023372549019607843, "calib/mu_c": 0.02480769230769231, "calib/mu_w": 0.0223841059602649, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.007695072995395158, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2199.0, "completions/max_terminated_length": 2199.0, "completions/mean_length": 286.59375, "completions/mean_terminated_length": 286.59375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.10986666666666667, "grad_norm": 0.012631945312023163, "learning_rate": 2.6944444444444444e-06, "loss": 0.0182, "num_tokens": 18057065.0, "reward": 1.0038397312164307, "reward_std": 0.023325443267822266, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6092422008514404, "rewards/format_reward_step": 0.9921875, "step": 103 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3478834149427712, "aux_distill/mean_u": 0.15305093505171327, "aux_distill/n_active_tok": 188.34375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5450406276267863, "calib/avg_num_step_conf": 2.9453125, "calib/ece": 0.29964705882352943, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016972541328103066, "calib/mean_conf": 0.02584313725490196, "calib/mu_c": 0.02698795180722891, "calib/mu_w": 0.0252906976744186, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009371154215682412, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 691.0, "completions/max_terminated_length": 691.0, "completions/mean_length": 256.73046875, "completions/mean_terminated_length": 257.7372741699219, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.11093333333333333, "grad_norm": 0.012816665694117546, "learning_rate": 2.666666666666667e-06, "loss": 0.0176, "num_tokens": 18229468.0, "reward": 1.0044673681259155, "reward_std": 0.019612101837992668, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.688622236251831, "rewards/format_reward_step": 0.99609375, "step": 104 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35361317032948136, "aux_distill/mean_u": 0.14714659951818576, "aux_distill/n_active_tok": 180.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5229885057471264, "calib/avg_num_step_conf": 2.82421875, "calib/ece": 0.2880314960629921, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011982758620689739, "calib/mean_conf": 0.026929133858267715, "calib/mu_c": 0.027750000000000004, "calib/mu_w": 0.02655172413793103, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.010234403230575732, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1993.0, "completions/max_terminated_length": 1993.0, "completions/mean_length": 273.28125, "completions/mean_terminated_length": 274.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.112, "grad_norm": 0.012089966796338558, "learning_rate": 2.6388888888888893e-06, "loss": 0.0422, "num_tokens": 18405188.0, "reward": 1.0004476308822632, "reward_std": 0.03112502582371235, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6962078213691711, "rewards/format_reward_step": 0.9921875, "step": 105 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3795621353201568, "aux_distill/mean_u": 0.18529847398515106, "aux_distill/n_active_tok": 176.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6127355875831486, "calib/avg_num_step_conf": 2.76171875, "calib/ece": 0.32047619047619047, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004046563192904654, "calib/mean_conf": 0.02873015873015873, "calib/mu_c": 0.031363636363636364, "calib/mu_w": 0.02731707317073171, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.010156249697172312, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1976.0, "completions/max_terminated_length": 1976.0, "completions/mean_length": 276.05859375, "completions/mean_terminated_length": 276.05859375, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.11306666666666666, "grad_norm": 0.010598819702863693, "learning_rate": 2.6111111111111113e-06, "loss": 0.1359, "num_tokens": 18580443.0, "reward": 0.9904502630233765, "reward_std": 0.06372330337762833, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6605879068374634, "rewards/format_reward_step": 0.9765625, "step": 106 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3647639914415777, "aux_distill/mean_u": 0.1525406433967182, "aux_distill/n_active_tok": 194.6875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5104798263343064, "calib/avg_num_step_conf": 3.0234375, "calib/ece": 0.25667968750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005980986600793471, "calib/mean_conf": 0.028476562499999997, "calib/mu_c": 0.028904109589041094, "calib/mu_w": 0.028306010928961747, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010987538995771243, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 838.0, "completions/max_terminated_length": 838.0, "completions/mean_length": 268.375, "completions/mean_terminated_length": 269.4274597167969, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.11413333333333334, "grad_norm": 0.012022835202515125, "learning_rate": 2.5833333333333337e-06, "loss": 0.0163, "num_tokens": 18753763.0, "reward": 1.0077763795852661, "reward_std": 0.008537918329238892, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7303965091705322, "rewards/format_reward_step": 1.0, "step": 107 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33226052671670914, "aux_distill/mean_u": 0.19509699005745737, "aux_distill/n_active_tok": 203.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5082465277777778, "calib/avg_num_step_conf": 3.171875, "calib/ece": 0.4675196850393701, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0014880952380952397, "calib/mean_conf": 0.03051181102362205, "calib/mu_c": 0.02976190476190476, "calib/mu_w": 0.03125, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.000984251968503937, "calib/std_conf": 0.017460130575465912, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1999.0, "completions/max_terminated_length": 1999.0, "completions/mean_length": 288.1484375, "completions/mean_terminated_length": 288.1484375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.1152, "grad_norm": 0.010769481770694256, "learning_rate": 2.5555555555555557e-06, "loss": 0.0623, "num_tokens": 18930761.0, "reward": 1.0023183822631836, "reward_std": 0.02828032523393631, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5241679549217224, "rewards/format_reward_step": 0.98828125, "step": 108 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3249229663051665, "aux_distill/mean_u": 0.1310028577043218, "aux_distill/n_active_tok": 197.28125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5770042194092827, "calib/avg_num_step_conf": 3.08203125, "calib/ece": 0.34219123505976096, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035123179529059495, "calib/mean_conf": 0.028326693227091634, "calib/mu_c": 0.03053763440860215, "calib/mu_w": 0.0270253164556962, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.010766853953004685, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1805.0, "completions/max_terminated_length": 1805.0, "completions/mean_length": 320.44921875, "completions/mean_terminated_length": 321.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.11626666666666667, "grad_norm": 0.009666023775935173, "learning_rate": 2.5277777777777778e-06, "loss": 0.0355, "num_tokens": 19117396.0, "reward": 0.9911122918128967, "reward_std": 0.045420363545417786, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6384745836257935, "rewards/format_reward_step": 0.98046875, "step": 109 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3342225840315223, "aux_distill/mean_u": 0.13468734558034612, "aux_distill/n_active_tok": 203.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5692927170868347, "calib/avg_num_step_conf": 3.12109375, "calib/ece": 0.2990551181102362, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0030434173669467712, "calib/mean_conf": 0.03165354330708662, "calib/mu_c": 0.03369047619047619, "calib/mu_w": 0.030647058823529416, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.010850725112934664, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1881.0, "completions/max_terminated_length": 1881.0, "completions/mean_length": 290.29296875, "completions/mean_terminated_length": 291.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.11733333333333333, "grad_norm": 0.010616586543619633, "learning_rate": 2.5e-06, "loss": 0.0857, "num_tokens": 19296631.0, "reward": 1.000656247138977, "reward_std": 0.035624802112579346, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6849062442779541, "rewards/format_reward_step": 0.98828125, "step": 110 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36175938649103045, "aux_distill/mean_u": 0.17420325036256049, "aux_distill/n_active_tok": 215.34375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5661924900679226, "calib/avg_num_step_conf": 3.36328125, "calib/ece": 0.36768627450980396, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0026797385620914924, "calib/mean_conf": 0.03231372549019607, "calib/mu_c": 0.03392156862745097, "calib/mu_w": 0.03124183006535948, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.011467755641539287, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1971.0, "completions/max_terminated_length": 1971.0, "completions/mean_length": 309.6328125, "completions/mean_terminated_length": 309.6328125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.1184, "grad_norm": 0.010419880039989948, "learning_rate": 2.4722222222222226e-06, "loss": 0.074, "num_tokens": 19483305.0, "reward": 1.0051193237304688, "reward_std": 0.03306365758180618, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6196136474609375, "rewards/format_reward_step": 0.9921875, "step": 111 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3407304754946381, "aux_distill/mean_u": 0.20758217514789062, "aux_distill/n_active_tok": 210.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5607392090979582, "calib/avg_num_step_conf": 3.29296875, "calib/ece": 0.38861111111111113, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025316619281468035, "calib/mean_conf": 0.032023809523809524, "calib/mu_c": 0.03349056603773585, "calib/mu_w": 0.030958904109589045, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.010440279359604128, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2222.0, "completions/max_terminated_length": 2222.0, "completions/mean_length": 341.10546875, "completions/mean_terminated_length": 342.4431457519531, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.11946666666666667, "grad_norm": 0.01065851654857397, "learning_rate": 2.4444444444444447e-06, "loss": 0.0805, "num_tokens": 19678548.0, "reward": 0.9859668016433716, "reward_std": 0.05308392643928528, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5852148532867432, "rewards/format_reward_step": 0.97265625, "step": 112 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31915219803340733, "aux_distill/mean_u": 0.13680159858362684, "aux_distill/n_active_tok": 241.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5020853698273052, "calib/avg_num_step_conf": 3.77734375, "calib/ece": 0.3543307086614173, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 3.5190615835761774e-05, "calib/mean_conf": 0.03543307086614173, "calib/mu_c": 0.03545454545454544, "calib/mu_w": 0.03541935483870968, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011722238473165, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 309.89453125, "completions/mean_terminated_length": 309.89453125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.12053333333333334, "grad_norm": 0.012033089064061642, "learning_rate": 2.4166666666666667e-06, "loss": 0.0769, "num_tokens": 19863081.0, "reward": 1.0052074193954468, "reward_std": 0.03318488597869873, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6315085887908936, "rewards/format_reward_step": 0.9921875, "step": 113 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31962078250944614, "aux_distill/mean_u": 0.14795968757927555, "aux_distill/n_active_tok": 236.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.568682344158268, "calib/avg_num_step_conf": 3.69140625, "calib/ece": 0.4123137254901961, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0026819708846584564, "calib/mean_conf": 0.03474509803921568, "calib/mu_c": 0.036228070175438595, "calib/mu_w": 0.03354609929078014, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012296728621642014, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1992.0, "completions/max_terminated_length": 1992.0, "completions/mean_length": 299.78125, "completions/mean_terminated_length": 299.78125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.1216, "grad_norm": 0.012581607326865196, "learning_rate": 2.388888888888889e-06, "loss": 0.0884, "num_tokens": 20044849.0, "reward": 1.011549949645996, "reward_std": 0.023771952837705612, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5816937685012817, "rewards/format_reward_step": 0.99609375, "step": 114 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31680090888403356, "aux_distill/mean_u": 0.13020203300884048, "aux_distill/n_active_tok": 258.5, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5182643427741467, "calib/avg_num_step_conf": 4.0390625, "calib/ece": 0.28820717131474105, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.731299927378395e-05, "calib/mean_conf": 0.034501992031872514, "calib/mu_c": 0.0345679012345679, "calib/mu_w": 0.034470588235294114, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013090665906683839, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2391.0, "completions/max_terminated_length": 2391.0, "completions/mean_length": 349.6015625, "completions/mean_terminated_length": 349.6015625, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.12266666666666666, "grad_norm": 0.011446863412857056, "learning_rate": 2.361111111111111e-06, "loss": 0.1378, "num_tokens": 20239611.0, "reward": 0.9907386302947998, "reward_std": 0.06587958335876465, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6846023201942444, "rewards/format_reward_step": 0.98046875, "step": 115 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3188343502115458, "aux_distill/mean_u": 0.125653797523076, "aux_distill/n_active_tok": 264.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4851329131268841, "calib/avg_num_step_conf": 4.13671875, "calib/ece": 0.3164426877470356, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000258289942449981, "calib/mean_conf": 0.035335968379446636, "calib/mu_c": 0.0351685393258427, "calib/mu_w": 0.03542682926829268, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012620399241968066, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2015.0, "completions/max_terminated_length": 2015.0, "completions/mean_length": 370.484375, "completions/mean_terminated_length": 370.484375, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.12373333333333333, "grad_norm": 0.01038755476474762, "learning_rate": 2.3333333333333336e-06, "loss": 0.0656, "num_tokens": 20438975.0, "reward": 0.999812126159668, "reward_std": 0.04449833184480667, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6636867523193359, "rewards/format_reward_step": 0.98828125, "step": 116 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2782537774182856, "aux_distill/mean_u": 0.11942123560487757, "aux_distill/n_active_tok": 270.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5806763658876334, "calib/avg_num_step_conf": 4.21875, "calib/ece": 0.245098814229249, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0030761492029097656, "calib/mean_conf": 0.035533596837944666, "calib/mu_c": 0.03774647887323943, "calib/mu_w": 0.034670329670329667, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01239229717829777, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2360.0, "completions/max_terminated_length": 2360.0, "completions/mean_length": 394.31640625, "completions/mean_terminated_length": 394.31640625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.1248, "grad_norm": 0.008689876645803452, "learning_rate": 2.305555555555556e-06, "loss": 0.0694, "num_tokens": 20646520.0, "reward": 0.9980502128601074, "reward_std": 0.04275971278548241, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7304753661155701, "rewards/format_reward_step": 0.98828125, "step": 117 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2848955246154219, "aux_distill/mean_u": 0.13537754193122786, "aux_distill/n_active_tok": 264.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4693205529473136, "calib/avg_num_step_conf": 4.140625, "calib/ece": 0.39548, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001208920187793422, "calib/mean_conf": 0.03652, "calib/mu_c": 0.03583333333333333, "calib/mu_w": 0.03704225352112675, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.012340567247902343, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 373.71484375, "completions/mean_terminated_length": 375.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.12586666666666665, "grad_norm": 0.010260322131216526, "learning_rate": 2.277777777777778e-06, "loss": 0.1309, "num_tokens": 20846199.0, "reward": 0.9909540414810181, "reward_std": 0.052009761333465576, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5834707021713257, "rewards/format_reward_step": 0.9765625, "step": 118 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32525242306292057, "aux_distill/mean_u": 0.17335328483206366, "aux_distill/n_active_tok": 251.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5693690320164763, "calib/avg_num_step_conf": 3.94140625, "calib/ece": 0.389609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002033327092304804, "calib/mean_conf": 0.036171875000000006, "calib/mu_c": 0.037339449541284396, "calib/mu_w": 0.03530612244897959, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011734104524179721, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1056.0, "completions/max_terminated_length": 1056.0, "completions/mean_length": 348.234375, "completions/mean_terminated_length": 349.6000061035156, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.12693333333333334, "grad_norm": 0.009621622040867805, "learning_rate": 2.25e-06, "loss": 0.0131, "num_tokens": 21040411.0, "reward": 1.0151753425598145, "reward_std": 0.012086945585906506, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6045695543289185, "rewards/format_reward_step": 1.0, "step": 119 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32881034817546606, "aux_distill/mean_u": 0.1642785781487071, "aux_distill/n_active_tok": 276.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5925612298071912, "calib/avg_num_step_conf": 4.30859375, "calib/ece": 0.3640711462450593, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004301719645648773, "calib/mean_conf": 0.03513833992094862, "calib/mu_c": 0.03772277227722772, "calib/mu_w": 0.03342105263157895, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011948532266813949, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2931.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 360.81640625, "completions/mean_terminated_length": 360.81640625, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.128, "grad_norm": 0.009301810525357723, "learning_rate": 2.222222222222222e-06, "loss": 0.1065, "num_tokens": 21239468.0, "reward": 1.0024833679199219, "reward_std": 0.044663794338703156, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6221542954444885, "rewards/format_reward_step": 0.98828125, "step": 120 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32076170202344656, "aux_distill/mean_u": 0.1362645584935083, "aux_distill/n_active_tok": 259.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.532148524278007, "calib/avg_num_step_conf": 4.05859375, "calib/ece": 0.42178571428571426, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010428435417327853, "calib/mean_conf": 0.03456349206349207, "calib/mu_c": 0.035130434782608695, "calib/mu_w": 0.03408759124087591, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.012416704079039853, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2775.0, "completions/max_terminated_length": 2775.0, "completions/mean_length": 385.90625, "completions/mean_terminated_length": 387.41961669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.12906666666666666, "grad_norm": 0.009882324375212193, "learning_rate": 2.1944444444444445e-06, "loss": 0.1093, "num_tokens": 21443316.0, "reward": 0.9994924068450928, "reward_std": 0.05566861853003502, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5653910040855408, "rewards/format_reward_step": 0.984375, "step": 121 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36098944093100727, "aux_distill/mean_u": 0.16837954315821466, "aux_distill/n_active_tok": 266.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6004809567138958, "calib/avg_num_step_conf": 4.08984375, "calib/ece": 0.34929411764705887, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0037800597946184844, "calib/mean_conf": 0.035019607843137256, "calib/mu_c": 0.0373469387755102, "calib/mu_w": 0.033566878980891716, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011371602394679636, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2043.0, "completions/max_terminated_length": 2043.0, "completions/mean_length": 338.0546875, "completions/mean_terminated_length": 338.0546875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.13013333333333332, "grad_norm": 0.010515622794628143, "learning_rate": 2.166666666666667e-06, "loss": 0.0752, "num_tokens": 21637202.0, "reward": 1.0097154378890991, "reward_std": 0.022465920075774193, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6405245661735535, "rewards/format_reward_step": 0.99609375, "step": 122 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3363469988107681, "aux_distill/mean_u": 0.1908947539477813, "aux_distill/n_active_tok": 285.21875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6363573311138854, "calib/avg_num_step_conf": 4.421875, "calib/ece": 0.32095617529880477, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005334304341794979, "calib/mean_conf": 0.034422310756972115, "calib/mu_c": 0.037865168539325846, "calib/mu_w": 0.032530864197530866, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00039840637450199205, "calib/std_conf": 0.012810484230277445, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 426.55859375, "completions/mean_terminated_length": 426.55859375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.1312, "grad_norm": 0.009776564314961433, "learning_rate": 2.138888888888889e-06, "loss": 0.0799, "num_tokens": 21851689.0, "reward": 0.9929714798927307, "reward_std": 0.049630723893642426, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6578179597854614, "rewards/format_reward_step": 0.98046875, "step": 123 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3255585916340351, "aux_distill/mean_u": 0.13354474605010633, "aux_distill/n_active_tok": 275.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5778886587585514, "calib/avg_num_step_conf": 4.30078125, "calib/ece": 0.4089763779527559, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003709282620975335, "calib/mean_conf": 0.03590551181102362, "calib/mu_c": 0.0379646017699115, "calib/mu_w": 0.03425531914893617, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011525687210875166, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2292.0, "completions/max_terminated_length": 2292.0, "completions/mean_length": 345.0234375, "completions/mean_terminated_length": 345.0234375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.13226666666666667, "grad_norm": 0.010006045922636986, "learning_rate": 2.1111111111111114e-06, "loss": 0.0613, "num_tokens": 22046831.0, "reward": 1.0082398653030396, "reward_std": 0.03543369472026825, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5828859210014343, "rewards/format_reward_step": 0.9921875, "step": 124 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29937303997576237, "aux_distill/mean_u": 0.15761940309284003, "aux_distill/n_active_tok": 282.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4592781750085121, "calib/avg_num_step_conf": 4.41796875, "calib/ece": 0.3146062992125984, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0025093632958801404, "calib/mean_conf": 0.035787401574803146, "calib/mu_c": 0.03415730337078652, "calib/mu_w": 0.03666666666666666, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.013070451089131675, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2252.0, "completions/max_terminated_length": 2252.0, "completions/mean_length": 364.56640625, "completions/mean_terminated_length": 365.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.13333333333333333, "grad_norm": 0.01030960027128458, "learning_rate": 2.0833333333333334e-06, "loss": 0.0573, "num_tokens": 22244968.0, "reward": 1.0033423900604248, "reward_std": 0.03312192112207413, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6668410301208496, "rewards/format_reward_step": 0.9921875, "step": 125 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3546295613050461, "aux_distill/mean_u": 0.19248182451684565, "aux_distill/n_active_tok": 274.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5799744897959184, "calib/avg_num_step_conf": 4.29296875, "calib/ece": 0.4076587301587301, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002410714285714294, "calib/mean_conf": 0.03678571428571428, "calib/mu_c": 0.038125000000000006, "calib/mu_w": 0.03571428571428571, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01292475608001525, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2221.0, "completions/max_terminated_length": 2221.0, "completions/mean_length": 373.27734375, "completions/mean_terminated_length": 373.27734375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.1344, "grad_norm": 0.009459101594984531, "learning_rate": 2.0555555555555555e-06, "loss": 0.1564, "num_tokens": 22445991.0, "reward": 1.0003063678741455, "reward_std": 0.054473064839839935, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5787379145622253, "rewards/format_reward_step": 0.984375, "step": 126 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3222575797699392, "aux_distill/mean_u": 0.15707868353003399, "aux_distill/n_active_tok": 285.65625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7024915824915825, "calib/avg_num_step_conf": 4.453125, "calib/ece": 0.36180722891566264, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009347474747474757, "calib/mean_conf": 0.035783132530120484, "calib/mu_c": 0.04141414141414142, "calib/mu_w": 0.03206666666666666, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.013243516130715595, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2538.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 399.76171875, "completions/mean_terminated_length": 399.76171875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.13546666666666668, "grad_norm": 0.009776722639799118, "learning_rate": 2.027777777777778e-06, "loss": 0.1557, "num_tokens": 22652002.0, "reward": 0.987963855266571, "reward_std": 0.07154573500156403, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6165527105331421, "rewards/format_reward_step": 0.97265625, "step": 127 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35741139482706785, "aux_distill/mean_u": 0.1896862522924126, "aux_distill/n_active_tok": 248.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.549607843137255, "calib/avg_num_step_conf": 3.86328125, "calib/ece": 0.36051383399209486, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0022437908496731954, "calib/mean_conf": 0.034743083003952575, "calib/mu_c": 0.03609999999999999, "calib/mu_w": 0.0338562091503268, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012527960884909475, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2875.0, "completions/max_terminated_length": 2875.0, "completions/mean_length": 395.7734375, "completions/mean_terminated_length": 395.7734375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.13653333333333334, "grad_norm": 0.0100556043908, "learning_rate": 2.0000000000000003e-06, "loss": 0.1223, "num_tokens": 22859984.0, "reward": 0.9996402263641357, "reward_std": 0.05114775896072388, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6242804527282715, "rewards/format_reward_step": 0.984375, "step": 128 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3307932415045798, "aux_distill/mean_u": 0.16508309298781187, "aux_distill/n_active_tok": 257.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5442307692307692, "calib/avg_num_step_conf": 4.02734375, "calib/ece": 0.3728346456692913, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018256410256410338, "calib/mean_conf": 0.03661417322834646, "calib/mu_c": 0.03769230769230769, "calib/mu_w": 0.03586666666666666, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013673381948982037, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2891.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 368.37109375, "completions/mean_terminated_length": 368.37109375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.1376, "grad_norm": 0.009598263539373875, "learning_rate": 1.9722222222222224e-06, "loss": 0.0814, "num_tokens": 23056671.0, "reward": 1.006742238998413, "reward_std": 0.03526865690946579, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6150468587875366, "rewards/format_reward_step": 0.9921875, "step": 129 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33436241559684277, "aux_distill/mean_u": 0.16774004768920198, "aux_distill/n_active_tok": 255.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6012166060454033, "calib/avg_num_step_conf": 3.99609375, "calib/ece": 0.4934387351778657, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004576696350181854, "calib/mean_conf": 0.036205533596837945, "calib/mu_c": 0.03835820895522387, "calib/mu_w": 0.033781512605042016, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.013211163122124604, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1853.0, "completions/max_terminated_length": 1853.0, "completions/mean_length": 350.28125, "completions/mean_terminated_length": 354.4347839355469, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.13866666666666666, "grad_norm": 0.010324436239898205, "learning_rate": 1.944444444444445e-06, "loss": 0.0118, "num_tokens": 23251631.0, "reward": 1.0076253414154053, "reward_std": 0.04466870054602623, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5035320520401001, "rewards/format_reward_step": 0.98828125, "step": 130 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32768950890749693, "aux_distill/mean_u": 0.1545797506967805, "aux_distill/n_active_tok": 249.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.526496010638298, "calib/avg_num_step_conf": 3.90234375, "calib/ece": 0.3321259842519685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012234042553191452, "calib/mean_conf": 0.037952755905511816, "calib/mu_c": 0.038723404255319144, "calib/mu_w": 0.0375, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.012971111613869993, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2391.0, "completions/max_terminated_length": 2391.0, "completions/mean_length": 368.4921875, "completions/mean_terminated_length": 368.4921875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.13973333333333332, "grad_norm": 0.0099790683016181, "learning_rate": 1.916666666666667e-06, "loss": 0.0965, "num_tokens": 23452173.0, "reward": 1.0035020112991333, "reward_std": 0.03792167454957962, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.651535153388977, "rewards/format_reward_step": 0.98828125, "step": 131 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32731077587231994, "aux_distill/mean_u": 0.22404103030817973, "aux_distill/n_active_tok": 251.3125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6355338834708676, "calib/avg_num_step_conf": 3.875, "calib/ece": 0.4702371541501977, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007210552638159548, "calib/mean_conf": 0.03964426877470356, "calib/mu_c": 0.043178294573643416, "calib/mu_w": 0.03596774193548387, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.014645748913053333, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 348.46875, "completions/mean_terminated_length": 349.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.1408, "grad_norm": 0.009376777336001396, "learning_rate": 1.888888888888889e-06, "loss": 0.0607, "num_tokens": 23646973.0, "reward": 1.0091564655303955, "reward_std": 0.04505951702594757, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.526125431060791, "rewards/format_reward_step": 0.98828125, "step": 132 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38718144688755274, "aux_distill/mean_u": 0.1867236683741768, "aux_distill/n_active_tok": 259.59375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6159521590556073, "calib/avg_num_step_conf": 4.0546875, "calib/ece": 0.2611290322580645, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004100652376514448, "calib/mean_conf": 0.03725806451612904, "calib/mu_c": 0.040135135135135135, "calib/mu_w": 0.03603448275862069, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.013550307083618207, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2897.0, "completions/max_terminated_length": 2897.0, "completions/mean_length": 419.609375, "completions/mean_terminated_length": 421.2549133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.14186666666666667, "grad_norm": 0.008629664778709412, "learning_rate": 1.8611111111111113e-06, "loss": 0.1469, "num_tokens": 23860737.0, "reward": 0.9756871461868286, "reward_std": 0.09578809142112732, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.6974679231643677, "rewards/format_reward_step": 0.96484375, "step": 133 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3483828450553119, "aux_distill/mean_u": 0.17435870078236385, "aux_distill/n_active_tok": 245.0625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5869265389082462, "calib/avg_num_step_conf": 3.82421875, "calib/ece": 0.30072580645161284, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004849012775842043, "calib/mean_conf": 0.037983870967741945, "calib/mu_c": 0.04119047619047619, "calib/mu_w": 0.036341463414634144, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.014890993094424206, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1958.0, "completions/max_terminated_length": 1958.0, "completions/mean_length": 400.36328125, "completions/mean_terminated_length": 403.5157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.14293333333333333, "grad_norm": 0.007807637099176645, "learning_rate": 1.8333333333333333e-06, "loss": 0.0699, "num_tokens": 24072182.0, "reward": 0.9793531894683838, "reward_std": 0.0878257304430008, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.665737509727478, "rewards/format_reward_step": 0.96484375, "step": 134 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3719960623420775, "aux_distill/mean_u": 0.14194528905229298, "aux_distill/n_active_tok": 271.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5835403726708074, "calib/avg_num_step_conf": 4.234375, "calib/ece": 0.31928286852589643, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038895790200137934, "calib/mean_conf": 0.039282868525896415, "calib/mu_c": 0.04177777777777777, "calib/mu_w": 0.037888198757763975, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014866057536278997, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2971.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 390.16015625, "completions/mean_terminated_length": 390.16015625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.144, "grad_norm": 0.008397375233471394, "learning_rate": 1.8055555555555557e-06, "loss": 0.1209, "num_tokens": 24277943.0, "reward": 0.994291365146637, "reward_std": 0.0661432147026062, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6565515398979187, "rewards/format_reward_step": 0.98046875, "step": 135 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34026037296280265, "aux_distill/mean_u": 0.16830172599808957, "aux_distill/n_active_tok": 248.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6767696267696268, "calib/avg_num_step_conf": 3.88671875, "calib/ece": 0.3774308300395257, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008354568854568853, "calib/mean_conf": 0.03758893280632412, "calib/mu_c": 0.04247619047619047, "calib/mu_w": 0.03412162162162162, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013131931485992352, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1928.0, "completions/max_terminated_length": 1928.0, "completions/mean_length": 377.73828125, "completions/mean_terminated_length": 379.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.14506666666666668, "grad_norm": 0.009265841916203499, "learning_rate": 1.777777777777778e-06, "loss": 0.0783, "num_tokens": 24483132.0, "reward": 1.0049197673797607, "reward_std": 0.044206224381923676, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6114019155502319, "rewards/format_reward_step": 0.98828125, "step": 136 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3602973301894963, "aux_distill/mean_u": 0.1981645887058841, "aux_distill/n_active_tok": 265.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5256769374416433, "calib/avg_num_step_conf": 4.1484375, "calib/ece": 0.35139442231075696, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00011938108576765166, "calib/mean_conf": 0.041195219123505974, "calib/mu_c": 0.04112244897959183, "calib/mu_w": 0.04124183006535948, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0010756972111553786, "calib/std_conf": 0.01908686729245096, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 383.1171875, "completions/mean_terminated_length": 383.1171875, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.14613333333333334, "grad_norm": 0.00766771798953414, "learning_rate": 1.75e-06, "loss": 0.1229, "num_tokens": 24688194.0, "reward": 0.9952003955841064, "reward_std": 0.06639545410871506, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6271195411682129, "rewards/format_reward_step": 0.98046875, "step": 137 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32277982542291284, "aux_distill/mean_u": 0.1438092188570127, "aux_distill/n_active_tok": 256.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5729961832061068, "calib/avg_num_step_conf": 4.0, "calib/ece": 0.4808764940239044, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003192111959287529, "calib/mean_conf": 0.04183266932270916, "calib/mu_c": 0.0433587786259542, "calib/mu_w": 0.04016666666666667, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00039840637450199205, "calib/std_conf": 0.013822576963452685, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2305.0, "completions/max_terminated_length": 2305.0, "completions/mean_length": 361.82421875, "completions/mean_terminated_length": 361.82421875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.1472, "grad_norm": 0.01024717278778553, "learning_rate": 1.7222222222222224e-06, "loss": 0.1336, "num_tokens": 24885157.0, "reward": 1.0017046928405762, "reward_std": 0.07045163214206696, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5112218856811523, "rewards/format_reward_step": 0.98046875, "step": 138 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3387621305882931, "aux_distill/mean_u": 0.16340000411789907, "aux_distill/n_active_tok": 224.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5776785714285715, "calib/avg_num_step_conf": 3.50390625, "calib/ece": 0.39956349206349207, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00439285714285715, "calib/mean_conf": 0.044880952380952376, "calib/mu_c": 0.04732142857142858, "calib/mu_w": 0.04292857142857143, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.013077368760425208, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1891.0, "completions/max_terminated_length": 1891.0, "completions/mean_length": 332.26171875, "completions/mean_terminated_length": 332.26171875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.14826666666666666, "grad_norm": 0.009077508933842182, "learning_rate": 1.6944444444444446e-06, "loss": 0.0794, "num_tokens": 25073312.0, "reward": 1.0000994205474854, "reward_std": 0.061380185186862946, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5822300910949707, "rewards/format_reward_step": 0.98046875, "step": 139 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3609836152754724, "aux_distill/mean_u": 0.14727386832180747, "aux_distill/n_active_tok": 233.96875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6066162570888468, "calib/avg_num_step_conf": 3.6328125, "calib/ece": 0.5015019762845849, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00501449275362318, "calib/mean_conf": 0.04395256916996048, "calib/mu_c": 0.04623188405797101, "calib/mu_w": 0.04121739130434783, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.013520213359584744, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1732.0, "completions/max_terminated_length": 1732.0, "completions/mean_length": 334.64453125, "completions/mean_terminated_length": 335.9568786621094, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.14933333333333335, "grad_norm": 0.009565300308167934, "learning_rate": 1.6666666666666667e-06, "loss": 0.0898, "num_tokens": 25263997.0, "reward": 1.0043518543243408, "reward_std": 0.06932803243398666, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.48917266726493835, "rewards/format_reward_step": 0.98046875, "step": 140 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34659315133467317, "aux_distill/mean_u": 0.1644295336444299, "aux_distill/n_active_tok": 262.09375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6452615384615384, "calib/avg_num_step_conf": 4.05859375, "calib/ece": 0.44654901960784316, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007910769230769235, "calib/mean_conf": 0.043647058823529414, "calib/mu_c": 0.04768000000000001, "calib/mu_w": 0.03976923076923077, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.01504349710256798, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2254.0, "completions/max_terminated_length": 2254.0, "completions/mean_length": 357.1796875, "completions/mean_terminated_length": 357.1796875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.1504, "grad_norm": 0.008549941703677177, "learning_rate": 1.638888888888889e-06, "loss": 0.0807, "num_tokens": 25462531.0, "reward": 1.014409065246582, "reward_std": 0.036273662000894547, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5483492016792297, "rewards/format_reward_step": 0.9921875, "step": 141 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3710846067406237, "aux_distill/mean_u": 0.1729861332487001, "aux_distill/n_active_tok": 231.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5972666830829846, "calib/avg_num_step_conf": 3.63671875, "calib/ece": 0.4383529411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0051446688007879965, "calib/mean_conf": 0.04792156862745098, "calib/mu_c": 0.05056451612903227, "calib/mu_w": 0.04541984732824427, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014224865190794278, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 805.0, "completions/max_terminated_length": 805.0, "completions/mean_length": 313.69140625, "completions/mean_terminated_length": 314.9216003417969, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.15146666666666667, "grad_norm": 0.009747452102601528, "learning_rate": 1.6111111111111113e-06, "loss": 0.0442, "num_tokens": 25647996.0, "reward": 1.0172728300094604, "reward_std": 0.03187453746795654, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5579832196235657, "rewards/format_reward_step": 0.9921875, "step": 142 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3411660713609308, "aux_distill/mean_u": 0.1608952434332203, "aux_distill/n_active_tok": 262.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5976733977172958, "calib/avg_num_step_conf": 4.09765625, "calib/ece": 0.4187747035573122, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00510660980810234, "calib/mean_conf": 0.05158102766798419, "calib/mu_c": 0.05428571428571428, "calib/mu_w": 0.04917910447761194, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.017999515686590115, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2703.0, "completions/max_terminated_length": 2703.0, "completions/mean_length": 365.1171875, "completions/mean_terminated_length": 365.1171875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.15253333333333333, "grad_norm": 0.008262704126536846, "learning_rate": 1.5833333333333333e-06, "loss": 0.0777, "num_tokens": 25848802.0, "reward": 1.0081377029418945, "reward_std": 0.05720390006899834, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5670566558837891, "rewards/format_reward_step": 0.984375, "step": 143 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3373702000826597, "aux_distill/mean_u": 0.13485213419125378, "aux_distill/n_active_tok": 263.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5864911417322834, "calib/avg_num_step_conf": 4.109375, "calib/ece": 0.44854901960784316, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007110605314960626, "calib/mean_conf": 0.05341176470588236, "calib/mu_c": 0.05695312500000001, "calib/mu_w": 0.04984251968503938, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.021358909121174646, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2474.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 334.23046875, "completions/mean_terminated_length": 334.23046875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.1536, "grad_norm": 0.00993384700268507, "learning_rate": 1.5555555555555558e-06, "loss": 0.0846, "num_tokens": 26038493.0, "reward": 1.0229222774505615, "reward_std": 0.03093772381544113, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5497507452964783, "rewards/format_reward_step": 0.99609375, "step": 144 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34878645557910204, "aux_distill/mean_u": 0.18183138162863954, "aux_distill/n_active_tok": 289.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5883374689826303, "calib/avg_num_step_conf": 4.5078125, "calib/ece": 0.45669291338582674, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006533498759305208, "calib/mean_conf": 0.05511811023622047, "calib/mu_c": 0.05830769230769231, "calib/mu_w": 0.0517741935483871, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01774119019399936, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2132.0, "completions/max_terminated_length": 2132.0, "completions/mean_length": 311.1015625, "completions/mean_terminated_length": 312.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.15466666666666667, "grad_norm": 0.009975967928767204, "learning_rate": 1.527777777777778e-06, "loss": 0.0247, "num_tokens": 26220839.0, "reward": 1.0201336145401, "reward_std": 0.036704327911138535, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5402672290802002, "rewards/format_reward_step": 0.9921875, "step": 145 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.398836228530854, "aux_distill/mean_u": 0.17221634645466294, "aux_distill/n_active_tok": 287.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5221437740693197, "calib/avg_num_step_conf": 4.4921875, "calib/ece": 0.27276679841897233, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004671230922835487, "calib/mean_conf": 0.05395256916996048, "calib/mu_c": 0.05426829268292682, "calib/mu_w": 0.05380116959064327, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0013043478260869562, "calib/std_conf": 0.017563822730073224, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2142.0, "completions/max_terminated_length": 2142.0, "completions/mean_length": 347.65625, "completions/mean_terminated_length": 349.0196228027344, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.15573333333333333, "grad_norm": 0.009122155606746674, "learning_rate": 1.5e-06, "loss": 0.0681, "num_tokens": 26417055.0, "reward": 1.0001678466796875, "reward_std": 0.05830451473593712, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6956480741500854, "rewards/format_reward_step": 0.984375, "step": 146 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33496221201494336, "aux_distill/mean_u": 0.13949762871699134, "aux_distill/n_active_tok": 265.6875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5589768654284784, "calib/avg_num_step_conf": 4.20703125, "calib/ece": 0.33366141732283466, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0030765721733463716, "calib/mean_conf": 0.05610236220472441, "calib/mu_c": 0.05797979797979799, "calib/mu_w": 0.05490322580645162, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.016629513013958232, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2389.0, "completions/max_terminated_length": 2389.0, "completions/mean_length": 319.578125, "completions/mean_terminated_length": 320.8313903808594, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.1568, "grad_norm": 0.010258171707391739, "learning_rate": 1.4722222222222225e-06, "loss": 0.0565, "num_tokens": 26602547.0, "reward": 1.0108044147491455, "reward_std": 0.04217313602566719, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6466090083122253, "rewards/format_reward_step": 0.98828125, "step": 147 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34962216787971556, "aux_distill/mean_u": 0.18060808775131193, "aux_distill/n_active_tok": 267.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6791163382072471, "calib/avg_num_step_conf": 4.16796875, "calib/ece": 0.5073517786561265, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011825174825174818, "calib/mean_conf": 0.05786561264822135, "calib/mu_c": 0.063006993006993, "calib/mu_w": 0.05118181818181818, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.020874879548542648, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 335.19140625, "completions/mean_terminated_length": 335.19140625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.15786666666666666, "grad_norm": 0.01002125721424818, "learning_rate": 1.4444444444444445e-06, "loss": 0.119, "num_tokens": 26793468.0, "reward": 1.0118077993392944, "reward_std": 0.07325895875692368, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.48845937848091125, "rewards/format_reward_step": 0.9765625, "step": 148 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3421163931488991, "aux_distill/mean_u": 0.15572480646023862, "aux_distill/n_active_tok": 286.1875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6150439737396259, "calib/avg_num_step_conf": 4.48828125, "calib/ece": 0.40062745098039215, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010598290598290608, "calib/mean_conf": 0.05819607843137255, "calib/mu_c": 0.06393162393162394, "calib/mu_w": 0.05333333333333333, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.022610963511000795, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 932.0, "completions/max_terminated_length": 932.0, "completions/mean_length": 352.83203125, "completions/mean_terminated_length": 354.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.15893333333333334, "grad_norm": 0.00943607185035944, "learning_rate": 1.4166666666666667e-06, "loss": 0.059, "num_tokens": 26988249.0, "reward": 1.015564203262329, "reward_std": 0.048596229404211044, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5858160257339478, "rewards/format_reward_step": 0.98828125, "step": 149 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36728407302871346, "aux_distill/mean_u": 0.17260016591145952, "aux_distill/n_active_tok": 240.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6337993319312137, "calib/avg_num_step_conf": 3.73828125, "calib/ece": 0.39545098039215687, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012134727205245568, "calib/mean_conf": 0.06729411764705882, "calib/mu_c": 0.0738135593220339, "calib/mu_w": 0.06167883211678833, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.027427833378327503, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1790.0, "completions/max_terminated_length": 1790.0, "completions/mean_length": 281.06640625, "completions/mean_terminated_length": 281.06640625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.16, "grad_norm": 0.011365544982254505, "learning_rate": 1.3888888888888892e-06, "loss": 0.0586, "num_tokens": 27165162.0, "reward": 1.02748703956604, "reward_std": 0.0358702652156353, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5979429483413696, "rewards/format_reward_step": 0.99609375, "step": 150 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37775949435308576, "aux_distill/mean_u": 0.19691793188973097, "aux_distill/n_active_tok": 308.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5913930789707187, "calib/avg_num_step_conf": 4.8125, "calib/ece": 0.2962301587301587, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007668418537983772, "calib/mean_conf": 0.06488095238095239, "calib/mu_c": 0.0697802197802198, "calib/mu_w": 0.062111801242236024, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.025811928810808394, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2308.0, "completions/max_terminated_length": 2308.0, "completions/mean_length": 372.53515625, "completions/mean_terminated_length": 372.53515625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.16106666666666666, "grad_norm": 0.008531233295798302, "learning_rate": 1.3611111111111112e-06, "loss": 0.0956, "num_tokens": 27367555.0, "reward": 1.0047494173049927, "reward_std": 0.07198721170425415, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6735613346099854, "rewards/format_reward_step": 0.98046875, "step": 151 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3672543163411319, "aux_distill/mean_u": 0.13673712651834333, "aux_distill/n_active_tok": 297.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5581105870020965, "calib/avg_num_step_conf": 4.64453125, "calib/ece": 0.3008235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.01533411949685537, "calib/mean_conf": 0.07564705882352941, "calib/mu_c": 0.08520833333333334, "calib/mu_w": 0.06987421383647797, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.06668827792736973, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 312.6640625, "completions/mean_terminated_length": 313.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.16213333333333332, "grad_norm": 0.009770411998033524, "learning_rate": 1.3333333333333334e-06, "loss": -0.0123, "num_tokens": 27552989.0, "reward": 1.019080400466919, "reward_std": 0.049445126205682755, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6709734201431274, "rewards/format_reward_step": 0.9921875, "step": 152 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38715670723468065, "aux_distill/mean_u": 0.2227721838341249, "aux_distill/n_active_tok": 298.53125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6068790819752183, "calib/avg_num_step_conf": 4.6015625, "calib/ece": 0.4351953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010723921137764755, "calib/mean_conf": 0.06871093750000001, "calib/mu_c": 0.074031007751938, "calib/mu_w": 0.06330708661417324, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.029795829286514138, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1053.0, "completions/max_terminated_length": 1053.0, "completions/mean_length": 323.4921875, "completions/mean_terminated_length": 324.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.1632, "grad_norm": 0.010421986691653728, "learning_rate": 1.3055555555555556e-06, "loss": 0.0204, "num_tokens": 27743123.0, "reward": 1.034500241279602, "reward_std": 0.027013670653104782, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5650941133499146, "rewards/format_reward_step": 1.0, "step": 153 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35084733460098505, "aux_distill/mean_u": 0.16063960511918457, "aux_distill/n_active_tok": 298.09375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5794099378881987, "calib/avg_num_step_conf": 4.65234375, "calib/ece": 0.3744705882352941, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010627329192546595, "calib/mean_conf": 0.07807843137254902, "calib/mu_c": 0.08391304347826087, "calib/mu_w": 0.07328571428571427, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0007843137254901962, "calib/std_conf": 0.040494900737928746, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 766.0, "completions/max_terminated_length": 766.0, "completions/mean_length": 303.015625, "completions/mean_terminated_length": 304.2039489746094, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.16426666666666667, "grad_norm": 0.010418664664030075, "learning_rate": 1.2777777777777779e-06, "loss": 0.0423, "num_tokens": 27925135.0, "reward": 1.0232136249542236, "reward_std": 0.051645420491695404, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6128336191177368, "rewards/format_reward_step": 0.984375, "step": 154 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33021269901655614, "aux_distill/mean_u": 0.14969116715629144, "aux_distill/n_active_tok": 292.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5542440660474716, "calib/avg_num_step_conf": 4.54296875, "calib/ece": 0.32307086614173225, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005285087719298254, "calib/mean_conf": 0.07850393700787402, "calib/mu_c": 0.08166666666666667, "calib/mu_w": 0.07638157894736841, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03686704650983364, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 304.2734375, "completions/mean_terminated_length": 304.2734375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.16533333333333333, "grad_norm": 0.011450732126832008, "learning_rate": 1.25e-06, "loss": 0.0748, "num_tokens": 28110245.0, "reward": 1.0229480266571045, "reward_std": 0.04218357801437378, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6513648629188538, "rewards/format_reward_step": 0.9921875, "step": 155 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3649008311331272, "aux_distill/mean_u": 0.16765184443474762, "aux_distill/n_active_tok": 335.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5861605428242223, "calib/avg_num_step_conf": 5.22265625, "calib/ece": 0.34644268774703557, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003197413903469476, "calib/mean_conf": 0.08320158102766798, "calib/mu_c": 0.08504672897196262, "calib/mu_w": 0.08184931506849315, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0033596837944664033, "calib/std_conf": 0.04752809526372359, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2478.0, "completions/max_terminated_length": 2478.0, "completions/mean_length": 340.5703125, "completions/mean_terminated_length": 340.5703125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.1664, "grad_norm": 0.008358205668628216, "learning_rate": 1.2222222222222223e-06, "loss": 0.067, "num_tokens": 28302191.0, "reward": 1.0192911624908447, "reward_std": 0.05631423741579056, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.632332444190979, "rewards/format_reward_step": 0.98828125, "step": 156 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38443474285304546, "aux_distill/mean_u": 0.19694286921091003, "aux_distill/n_active_tok": 322.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6189921259842519, "calib/avg_num_step_conf": 5.01953125, "calib/ece": 0.4149603174603174, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01835023622047245, "calib/mean_conf": 0.08900793650793651, "calib/mu_c": 0.09811023622047248, "calib/mu_w": 0.07976000000000003, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.04890272106425196, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 335.7890625, "completions/mean_terminated_length": 337.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.16746666666666668, "grad_norm": 0.008726774714887142, "learning_rate": 1.1944444444444446e-06, "loss": 0.1026, "num_tokens": 28491881.0, "reward": 1.027970552444458, "reward_std": 0.0656554326415062, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5754722356796265, "rewards/format_reward_step": 0.984375, "step": 157 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3361390228383243, "aux_distill/mean_u": 0.1681130252148755, "aux_distill/n_active_tok": 342.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6021364317841078, "calib/avg_num_step_conf": 5.33203125, "calib/ece": 0.4535826771653544, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012662418790604688, "calib/mean_conf": 0.0897244094488189, "calib/mu_c": 0.09550724637681159, "calib/mu_w": 0.0828448275862069, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03995473367151239, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1953.0, "completions/max_terminated_length": 1953.0, "completions/mean_length": 334.80078125, "completions/mean_terminated_length": 334.80078125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.16853333333333334, "grad_norm": 0.009793547913432121, "learning_rate": 1.1666666666666668e-06, "loss": 0.0737, "num_tokens": 28682830.0, "reward": 1.0388861894607544, "reward_std": 0.05535713583230972, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5465222597122192, "rewards/format_reward_step": 0.9921875, "step": 158 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34734752285294235, "aux_distill/mean_u": 0.14985655571886566, "aux_distill/n_active_tok": 300.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5640445314411549, "calib/avg_num_step_conf": 4.73828125, "calib/ece": 0.383515625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005874724736970871, "calib/mean_conf": 0.09585937500000002, "calib/mu_c": 0.0989344262295082, "calib/mu_w": 0.09305970149253733, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00140625, "calib/std_conf": 0.042912107552640376, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1069.0, "completions/max_terminated_length": 1069.0, "completions/mean_length": 288.9375, "completions/mean_terminated_length": 290.07061767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.1696, "grad_norm": 0.009990203194320202, "learning_rate": 1.138888888888889e-06, "loss": 0.0227, "num_tokens": 28861582.0, "reward": 1.0416332483291626, "reward_std": 0.03353843465447426, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6067039370536804, "rewards/format_reward_step": 1.0, "step": 159 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36702860286459327, "aux_distill/mean_u": 0.16000241486172087, "aux_distill/n_active_tok": 336.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5723726627981947, "calib/avg_num_step_conf": 5.1875, "calib/ece": 0.3390836653386454, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009315925209542247, "calib/mean_conf": 0.10649402390438246, "calib/mu_c": 0.11172727272727276, "calib/mu_w": 0.10241134751773051, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003665338645418327, "calib/std_conf": 0.0613433528297715, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 345.75, "completions/mean_terminated_length": 345.75, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.17066666666666666, "grad_norm": 0.00989892240613699, "learning_rate": 1.111111111111111e-06, "loss": 0.0859, "num_tokens": 29054934.0, "reward": 1.0210720300674438, "reward_std": 0.0724157989025116, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6319879293441772, "rewards/format_reward_step": 0.98046875, "step": 160 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33740366343408823, "aux_distill/mean_u": 0.17007575680141462, "aux_distill/n_active_tok": 324.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5586751626178148, "calib/avg_num_step_conf": 5.078125, "calib/ece": 0.5285490196078432, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009093986459577874, "calib/mean_conf": 0.10674509803921568, "calib/mu_c": 0.11006172839506174, "calib/mu_w": 0.10096774193548387, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0516776749071968, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 315.41015625, "completions/mean_terminated_length": 316.6470642089844, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.17173333333333332, "grad_norm": 0.009658781811594963, "learning_rate": 1.0833333333333335e-06, "loss": 0.0206, "num_tokens": 29239599.0, "reward": 1.05873703956604, "reward_std": 0.05385105684399605, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.48856794834136963, "rewards/format_reward_step": 0.99609375, "step": 161 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.351145098451525, "aux_distill/mean_u": 0.16597964091285583, "aux_distill/n_active_tok": 344.96875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6000405844155845, "calib/avg_num_step_conf": 5.375, "calib/ece": 0.5466015625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016260822510822517, "calib/mean_conf": 0.10964843750000003, "calib/mu_c": 0.11523809523809525, "calib/mu_w": 0.09897727272727273, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.054988221728008206, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1110.0, "completions/max_terminated_length": 1110.0, "completions/mean_length": 315.31640625, "completions/mean_terminated_length": 316.5529479980469, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.1728, "grad_norm": 0.009903507307171822, "learning_rate": 1.0555555555555557e-06, "loss": 0.0578, "num_tokens": 29424464.0, "reward": 1.0642025470733643, "reward_std": 0.05435194447636604, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.47606131434440613, "rewards/format_reward_step": 0.99609375, "step": 162 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3831592188216746, "aux_distill/mean_u": 0.19678231264481222, "aux_distill/n_active_tok": 386.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6116102499209112, "calib/avg_num_step_conf": 6.0078125, "calib/ece": 0.31413385826771656, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021293894337235053, "calib/mean_conf": 0.115, "calib/mu_c": 0.1271559633027523, "calib/mu_w": 0.10586206896551725, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0638511433252139, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2184.0, "completions/max_terminated_length": 2184.0, "completions/mean_length": 355.8203125, "completions/mean_terminated_length": 355.8203125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.17386666666666667, "grad_norm": 0.009598040953278542, "learning_rate": 1.0277777777777777e-06, "loss": 0.0468, "num_tokens": 29620386.0, "reward": 1.0377447605133057, "reward_std": 0.06843307614326477, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6575207114219666, "rewards/format_reward_step": 0.9921875, "step": 163 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.361350919585675, "aux_distill/mean_u": 0.1579362222322398, "aux_distill/n_active_tok": 359.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6311188811188811, "calib/avg_num_step_conf": 5.59765625, "calib/ece": 0.2724313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026507381507381494, "calib/mean_conf": 0.11580392156862745, "calib/mu_c": 0.132020202020202, "calib/mu_w": 0.10551282051282052, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05963319894147763, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1842.0, "completions/max_terminated_length": 1842.0, "completions/mean_length": 346.69921875, "completions/mean_terminated_length": 346.69921875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.17493333333333333, "grad_norm": 0.010199583135545254, "learning_rate": 1.0000000000000002e-06, "loss": 0.0409, "num_tokens": 29815277.0, "reward": 1.0386981964111328, "reward_std": 0.059402212500572205, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6945840120315552, "rewards/format_reward_step": 0.99609375, "step": 164 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3957953127101064, "aux_distill/mean_u": 0.17184055225291792, "aux_distill/n_active_tok": 346.4375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5459833614974652, "calib/avg_num_step_conf": 5.37890625, "calib/ece": 0.2625490196078431, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007409333160015602, "calib/mean_conf": 0.12176470588235294, "calib/mu_c": 0.1263265306122449, "calib/mu_w": 0.11891719745222931, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06361359708991009, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 347.6171875, "completions/mean_terminated_length": 347.6171875, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.176, "grad_norm": 0.010400495491921902, "learning_rate": 9.722222222222224e-07, "loss": 0.0696, "num_tokens": 30009843.0, "reward": 1.0350532531738281, "reward_std": 0.05347467213869095, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6912003755569458, "rewards/format_reward_step": 0.99609375, "step": 165 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37643085792660713, "aux_distill/mean_u": 0.2064087180197083, "aux_distill/n_active_tok": 397.71875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6964863184079603, "calib/avg_num_step_conf": 6.15625, "calib/ece": 0.40870078740157484, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03852238805970147, "calib/mean_conf": 0.1223228346456693, "calib/mu_c": 0.1405223880597015, "calib/mu_w": 0.10200000000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001732283464566929, "calib/std_conf": 0.06698012184725738, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 390.49609375, "completions/mean_terminated_length": 390.49609375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.17706666666666668, "grad_norm": 0.00810649711638689, "learning_rate": 9.444444444444445e-07, "loss": 0.0979, "num_tokens": 30215994.0, "reward": 1.0560935735702515, "reward_std": 0.07129926979541779, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5965621471405029, "rewards/format_reward_step": 0.9921875, "step": 166 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38002316700294614, "aux_distill/mean_u": 0.19313883956834368, "aux_distill/n_active_tok": 386.53125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6171767214348426, "calib/avg_num_step_conf": 6.01953125, "calib/ece": 0.330197628458498, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.033337514110121605, "calib/mean_conf": 0.1401581027667984, "calib/mu_c": 0.1578151260504201, "calib/mu_w": 0.12447761194029851, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.08695993525459589, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2277.0, "completions/max_terminated_length": 2277.0, "completions/mean_length": 365.75390625, "completions/mean_terminated_length": 365.75390625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.17813333333333334, "grad_norm": 0.008568638935685158, "learning_rate": 9.166666666666666e-07, "loss": 0.0978, "num_tokens": 30415235.0, "reward": 1.0457689762115479, "reward_std": 0.09472964704036713, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6423191428184509, "rewards/format_reward_step": 0.984375, "step": 167 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3776658959686756, "aux_distill/mean_u": 0.21580393844564622, "aux_distill/n_active_tok": 337.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6339692307692307, "calib/avg_num_step_conf": 5.27734375, "calib/ece": 0.3627843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03651076923076921, "calib/mean_conf": 0.14733333333333334, "calib/mu_c": 0.16523076923076924, "calib/mu_w": 0.12872000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00015686274509803936, "calib/std_conf": 0.0849910414348654, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 932.0, "completions/max_terminated_length": 932.0, "completions/mean_length": 335.875, "completions/mean_terminated_length": 337.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.1792, "grad_norm": 0.009574908763170242, "learning_rate": 8.88888888888889e-07, "loss": 0.0351, "num_tokens": 30605891.0, "reward": 1.0578217506408691, "reward_std": 0.08997109532356262, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6195499897003174, "rewards/format_reward_step": 0.98828125, "step": 168 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3557533877901733, "aux_distill/mean_u": 0.15889651282363162, "aux_distill/n_active_tok": 359.59375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6078159645232816, "calib/avg_num_step_conf": 5.59765625, "calib/ece": 0.3290980392156862, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025280857354028102, "calib/mean_conf": 0.1532549019607843, "calib/mu_c": 0.16634146341463416, "calib/mu_w": 0.14106060606060605, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.08227163119508565, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2253.0, "completions/max_terminated_length": 2253.0, "completions/mean_length": 355.71875, "completions/mean_terminated_length": 355.71875, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.18026666666666666, "grad_norm": 0.009443276561796665, "learning_rate": 8.611111111111112e-07, "loss": 0.04, "num_tokens": 30801139.0, "reward": 1.051137089729309, "reward_std": 0.0901157557964325, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6374304294586182, "rewards/format_reward_step": 0.984375, "step": 169 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.351165306288749, "aux_distill/mean_u": 0.1616900184663727, "aux_distill/n_active_tok": 420.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5186444948349711, "calib/avg_num_step_conf": 6.58203125, "calib/ece": 0.33551587301587305, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005952380952380959, "calib/mean_conf": 0.16829365079365083, "calib/mu_c": 0.17126984126984127, "calib/mu_w": 0.1653174603174603, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001904761904761905, "calib/std_conf": 0.08327536721674905, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2416.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 377.94921875, "completions/mean_terminated_length": 379.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.18133333333333335, "grad_norm": 0.019583020359277725, "learning_rate": 8.333333333333333e-07, "loss": 0.067, "num_tokens": 31002046.0, "reward": 1.051318645477295, "reward_std": 0.09664470702409744, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6260746121406555, "rewards/format_reward_step": 0.984375, "step": 170 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3422384618315846, "aux_distill/mean_u": 0.1775502851850448, "aux_distill/n_active_tok": 346.15625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5632081280788178, "calib/avg_num_step_conf": 5.40234375, "calib/ece": 0.28039062499999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026197044334975356, "calib/mean_conf": 0.17765625000000002, "calib/mu_c": 0.19198275862068964, "calib/mu_w": 0.1657857142857143, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0024609375, "calib/std_conf": 0.09883686349706522, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 928.0, "completions/max_terminated_length": 928.0, "completions/mean_length": 334.4609375, "completions/mean_terminated_length": 335.7725830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.1824, "grad_norm": 0.009695254266262054, "learning_rate": 8.055555555555557e-07, "loss": 0.0486, "num_tokens": 31194564.0, "reward": 1.0663268566131592, "reward_std": 0.08017165958881378, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6795288920402527, "rewards/format_reward_step": 1.0, "step": 171 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34708446078002453, "aux_distill/mean_u": 0.12701413012860135, "aux_distill/n_active_tok": 359.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5189120139581256, "calib/avg_num_step_conf": 5.57421875, "calib/ece": 0.34846456692913386, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007395314057826563, "calib/mean_conf": 0.19641732283464566, "calib/mu_c": 0.1998529411764706, "calib/mu_w": 0.19245762711864403, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004724409448818897, "calib/std_conf": 0.09339273951655766, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1859.0, "completions/max_terminated_length": 1859.0, "completions/mean_length": 331.703125, "completions/mean_terminated_length": 331.703125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.18346666666666667, "grad_norm": 0.009064041078090668, "learning_rate": 7.777777777777779e-07, "loss": 0.0595, "num_tokens": 31382832.0, "reward": 1.0748931169509888, "reward_std": 0.09136275202035904, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6263488531112671, "rewards/format_reward_step": 0.9921875, "step": 172 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31844893423840404, "aux_distill/mean_u": 0.1626621419976694, "aux_distill/n_active_tok": 407.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4622554448135844, "calib/avg_num_step_conf": 6.32421875, "calib/ece": 0.33415686274509815, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.019946474713916584, "calib/mean_conf": 0.19094117647058823, "calib/mu_c": 0.18108527131782945, "calib/mu_w": 0.20103174603174603, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009607843137254903, "calib/std_conf": 0.10646870326573207, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2345.0, "completions/max_terminated_length": 2345.0, "completions/mean_length": 368.19140625, "completions/mean_terminated_length": 368.19140625, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.18453333333333333, "grad_norm": 0.008606944233179092, "learning_rate": 7.5e-07, "loss": 0.0435, "num_tokens": 31580249.0, "reward": 1.0635401010513306, "reward_std": 0.08568964153528214, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6270800828933716, "rewards/format_reward_step": 0.99609375, "step": 173 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.379526324570179, "aux_distill/mean_u": 0.19726045253259963, "aux_distill/n_active_tok": 413.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47033685268979386, "calib/avg_num_step_conf": 6.45703125, "calib/ece": 0.2892094861660079, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.015221216691804956, "calib/mean_conf": 0.1850197628458498, "calib/mu_c": 0.17683760683760683, "calib/mu_w": 0.19205882352941178, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005889328063241106, "calib/std_conf": 0.10748885227953005, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2196.0, "completions/max_terminated_length": 2196.0, "completions/mean_length": 396.53515625, "completions/mean_terminated_length": 396.53515625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.1856, "grad_norm": 0.008462514728307724, "learning_rate": 7.222222222222222e-07, "loss": 0.087, "num_tokens": 31785994.0, "reward": 1.0464767217636108, "reward_std": 0.10494433343410492, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6476410627365112, "rewards/format_reward_step": 0.98828125, "step": 174 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3483807328157127, "aux_distill/mean_u": 0.16240182766074354, "aux_distill/n_active_tok": 386.625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5680039138943248, "calib/avg_num_step_conf": 6.01953125, "calib/ece": 0.2311553784860558, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024359425962165643, "calib/mean_conf": 0.2013545816733068, "calib/mu_c": 0.21552380952380948, "calib/mu_w": 0.19116438356164384, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.007091633466135458, "calib/std_conf": 0.11309023020690735, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2076.0, "completions/max_terminated_length": 2076.0, "completions/mean_length": 348.3125, "completions/mean_terminated_length": 348.3125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.18666666666666668, "grad_norm": 0.009568187408149242, "learning_rate": 6.944444444444446e-07, "loss": 0.02, "num_tokens": 31980986.0, "reward": 1.0292150974273682, "reward_std": 0.14621660113334656, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6834300756454468, "rewards/format_reward_step": 0.96484375, "step": 175 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.390253531280905, "aux_distill/mean_u": 0.23588810433493573, "aux_distill/n_active_tok": 391.28125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.609801031687546, "calib/avg_num_step_conf": 6.1484375, "calib/ece": 0.24800781249999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04109923851633504, "calib/mean_conf": 0.21292968750000002, "calib/mu_c": 0.23508474576271188, "calib/mu_w": 0.19398550724637684, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.10612313629059567, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1037.0, "completions/max_terminated_length": 1037.0, "completions/mean_length": 349.96875, "completions/mean_terminated_length": 351.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.18773333333333334, "grad_norm": 0.010521200485527515, "learning_rate": 6.666666666666667e-07, "loss": 0.0337, "num_tokens": 32174642.0, "reward": 1.0800588130950928, "reward_std": 0.07746738940477371, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6991801261901855, "rewards/format_reward_step": 1.0, "step": 176 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34870817279443145, "aux_distill/mean_u": 0.16614834834143513, "aux_distill/n_active_tok": 381.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.629968944099379, "calib/avg_num_step_conf": 5.89453125, "calib/ece": 0.2436470588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04457453416149071, "calib/mean_conf": 0.2167450980392157, "calib/mu_c": 0.24121739130434786, "calib/mu_w": 0.19664285714285715, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004705882352941175, "calib/std_conf": 0.11159799055349796, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 360.1015625, "completions/mean_terminated_length": 360.1015625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.1888, "grad_norm": 0.009426409378647804, "learning_rate": 6.388888888888889e-07, "loss": 0.0435, "num_tokens": 32370660.0, "reward": 1.0748529434204102, "reward_std": 0.08647443354129791, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.7043933868408203, "rewards/format_reward_step": 0.99609375, "step": 177 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.383257410954684, "aux_distill/mean_u": 0.20492723478918864, "aux_distill/n_active_tok": 369.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.657276119402985, "calib/avg_num_step_conf": 5.75390625, "calib/ece": 0.2581102362204724, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.06492661691542292, "calib/mean_conf": 0.21433070866141732, "calib/mu_c": 0.24858333333333338, "calib/mu_w": 0.18365671641791045, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.11762137675429811, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2004.0, "completions/max_terminated_length": 2004.0, "completions/mean_length": 360.80859375, "completions/mean_terminated_length": 360.80859375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.18986666666666666, "grad_norm": 0.008812584914267063, "learning_rate": 6.111111111111112e-07, "loss": 0.0495, "num_tokens": 32569099.0, "reward": 1.0790581703186035, "reward_std": 0.10145758092403412, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6971789002418518, "rewards/format_reward_step": 0.9921875, "step": 178 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4004754419438541, "aux_distill/mean_u": 0.20431848266051975, "aux_distill/n_active_tok": 404.59375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6015818858560795, "calib/avg_num_step_conf": 6.296875, "calib/ece": 0.28051181102362205, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.043978908188585575, "calib/mean_conf": 0.2312992125984252, "calib/mu_c": 0.25276923076923075, "calib/mu_w": 0.20879032258064517, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.11336081444374199, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1880.0, "completions/max_terminated_length": 1880.0, "completions/mean_length": 385.54296875, "completions/mean_terminated_length": 385.54296875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.19093333333333334, "grad_norm": 0.008360245265066624, "learning_rate": 5.833333333333334e-07, "loss": 0.0576, "num_tokens": 32774062.0, "reward": 1.087631106376648, "reward_std": 0.09885761886835098, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6752620935440063, "rewards/format_reward_step": 0.9921875, "step": 179 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33522659935988486, "aux_distill/mean_u": 0.20487803825146703, "aux_distill/n_active_tok": 378.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5125222476481058, "calib/avg_num_step_conf": 5.94140625, "calib/ece": 0.22277777777777777, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012578184591914593, "calib/mean_conf": 0.22960317460317461, "calib/mu_c": 0.23649122807017545, "calib/mu_w": 0.22391304347826085, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.10751818446161512, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1961.0, "completions/max_terminated_length": 1961.0, "completions/mean_length": 408.234375, "completions/mean_terminated_length": 409.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.192, "grad_norm": 0.008559898473322392, "learning_rate": 5.555555555555555e-07, "loss": 0.0568, "num_tokens": 32982426.0, "reward": 1.0580508708953857, "reward_std": 0.1284056156873703, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6864140629768372, "rewards/format_reward_step": 0.984375, "step": 180 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35764050111174583, "aux_distill/mean_u": 0.15735053100971474, "aux_distill/n_active_tok": 374.96875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5122976338729763, "calib/avg_num_step_conf": 5.84765625, "calib/ece": 0.2119140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00678829389788288, "calib/mean_conf": 0.23449218750000003, "calib/mu_c": 0.23836363636363628, "calib/mu_w": 0.2315753424657534, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008359375, "calib/std_conf": 0.11839311477642965, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 341.95703125, "completions/mean_terminated_length": 343.2980651855469, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.19306666666666666, "grad_norm": 0.010379571467638016, "learning_rate": 5.277777777777779e-07, "loss": 0.0517, "num_tokens": 33176231.0, "reward": 1.067920207977295, "reward_std": 0.08823342621326447, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7061527967453003, "rewards/format_reward_step": 1.0, "step": 181 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3427185930777341, "aux_distill/mean_u": 0.1633866363280467, "aux_distill/n_active_tok": 423.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5718860984271944, "calib/avg_num_step_conf": 6.56640625, "calib/ece": 0.348543307086614, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.02038305428716386, "calib/mean_conf": 0.24208661417322838, "calib/mu_c": 0.25075342465753425, "calib/mu_w": 0.2303703703703704, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.007913385826771657, "calib/std_conf": 0.11763405980218507, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2886.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 394.640625, "completions/mean_terminated_length": 394.640625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.19413333333333332, "grad_norm": 0.008300860412418842, "learning_rate": 5.000000000000001e-07, "loss": 0.0785, "num_tokens": 33383419.0, "reward": 1.0954002141952515, "reward_std": 0.12059742212295532, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6322066187858582, "rewards/format_reward_step": 0.98828125, "step": 182 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34864079393446445, "aux_distill/mean_u": 0.17197240593332896, "aux_distill/n_active_tok": 403.40625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.527304737516005, "calib/avg_num_step_conf": 6.28515625, "calib/ece": 0.2254761904761905, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016542893725992325, "calib/mean_conf": 0.23015873015873015, "calib/mu_c": 0.23109090909090907, "calib/mu_w": 0.22943661971830984, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009563492063492062, "calib/std_conf": 0.10816642179871122, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1803.0, "completions/max_terminated_length": 1803.0, "completions/mean_length": 406.1484375, "completions/mean_terminated_length": 406.1484375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.1952, "grad_norm": 0.008422174490988255, "learning_rate": 4.7222222222222226e-07, "loss": 0.0849, "num_tokens": 33594073.0, "reward": 1.0518405437469482, "reward_std": 0.1299438327550888, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.689618706703186, "rewards/format_reward_step": 0.984375, "step": 183 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3643836765550077, "aux_distill/mean_u": 0.1838418861646983, "aux_distill/n_active_tok": 372.09375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5901516569930725, "calib/avg_num_step_conf": 5.80078125, "calib/ece": 0.32921875000000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.02247768832303565, "calib/mean_conf": 0.264375, "calib/mu_c": 0.27394557823129256, "calib/mu_w": 0.2514678899082569, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009687500000000003, "calib/std_conf": 0.12339754100062124, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 937.0, "completions/max_terminated_length": 937.0, "completions/mean_length": 345.78515625, "completions/mean_terminated_length": 347.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.19626666666666667, "grad_norm": 0.009029755368828773, "learning_rate": 4.444444444444445e-07, "loss": 0.0563, "num_tokens": 33787874.0, "reward": 1.1147441864013672, "reward_std": 0.10845861583948135, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6552695035934448, "rewards/format_reward_step": 1.0, "step": 184 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33357510063797235, "aux_distill/mean_u": 0.1604589205149373, "aux_distill/n_active_tok": 384.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5682057965617825, "calib/avg_num_step_conf": 5.96875, "calib/ece": 0.2504330708661417, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.025701607397753345, "calib/mean_conf": 0.2588582677165354, "calib/mu_c": 0.27211382113821136, "calib/mu_w": 0.24641221374045802, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.012519685039370083, "calib/std_conf": 0.13382573133987627, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 363.3515625, "completions/mean_terminated_length": 364.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.19733333333333333, "grad_norm": 0.008039743639528751, "learning_rate": 4.1666666666666667e-07, "loss": 0.026, "num_tokens": 33987812.0, "reward": 1.0771498680114746, "reward_std": 0.12123231589794159, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6855496168136597, "rewards/format_reward_step": 0.98828125, "step": 185 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38432842725887895, "aux_distill/mean_u": 0.21509393378718752, "aux_distill/n_active_tok": 407.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6162308673469387, "calib/avg_num_step_conf": 6.30859375, "calib/ece": 0.2046031746031746, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.04655357142857139, "calib/mean_conf": 0.24976190476190477, "calib/mu_c": 0.27562499999999995, "calib/mu_w": 0.22907142857142856, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004960317460317459, "calib/std_conf": 0.12407732027206138, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 380.109375, "completions/mean_terminated_length": 381.60003662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.1984, "grad_norm": 0.008767403662204742, "learning_rate": 3.8888888888888895e-07, "loss": 0.0441, "num_tokens": 34190160.0, "reward": 1.0666804313659668, "reward_std": 0.12350873649120331, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.7114859819412231, "rewards/format_reward_step": 0.984375, "step": 186 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3616175437346101, "aux_distill/mean_u": 0.187725407741795, "aux_distill/n_active_tok": 461.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5431467181467183, "calib/avg_num_step_conf": 7.140625, "calib/ece": 0.18505928853754938, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018873230373230382, "calib/mean_conf": 0.250197628458498, "calib/mu_c": 0.26123809523809527, "calib/mu_w": 0.2423648648648649, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.010118577075098814, "calib/std_conf": 0.1355692159982922, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2371.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 424.375, "completions/mean_terminated_length": 424.375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.19946666666666665, "grad_norm": 0.007748098578304052, "learning_rate": 3.611111111111111e-07, "loss": 0.058, "num_tokens": 34400344.0, "reward": 1.0515248775482178, "reward_std": 0.1344430148601532, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.708518385887146, "rewards/format_reward_step": 0.984375, "step": 187 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3558740112930536, "aux_distill/mean_u": 0.1870574824077909, "aux_distill/n_active_tok": 410.65625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6338037121789982, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.3025, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.0531464530892449, "calib/mean_conf": 0.2505952380952381, "calib/mu_c": 0.27463768115942033, "calib/mu_w": 0.22149122807017543, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002738095238095238, "calib/std_conf": 0.13092786567064438, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2239.0, "completions/max_terminated_length": 2239.0, "completions/mean_length": 403.42578125, "completions/mean_terminated_length": 403.42578125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.20053333333333334, "grad_norm": 0.008264458738267422, "learning_rate": 3.3333333333333335e-07, "loss": 0.0726, "num_tokens": 34607693.0, "reward": 1.0906145572662354, "reward_std": 0.14208179712295532, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.661698043346405, "rewards/format_reward_step": 0.98046875, "step": 188 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3595278114080429, "aux_distill/mean_u": 0.18541961370967283, "aux_distill/n_active_tok": 444.1875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5591603053435115, "calib/avg_num_step_conf": 6.9140625, "calib/ece": 0.2752156862745097, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.01724082738241814, "calib/mean_conf": 0.251921568627451, "calib/mu_c": 0.2603053435114504, "calib/mu_w": 0.24306451612903227, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006705882352941177, "calib/std_conf": 0.13009134696205646, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1585.0, "completions/max_terminated_length": 1585.0, "completions/mean_length": 411.5078125, "completions/mean_terminated_length": 413.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.2016, "grad_norm": 0.008612432517111301, "learning_rate": 3.055555555555556e-07, "loss": 0.0732, "num_tokens": 34820807.0, "reward": 1.085369348526001, "reward_std": 0.10980735719203949, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.666832447052002, "rewards/format_reward_step": 0.9921875, "step": 189 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3589458088390529, "aux_distill/mean_u": 0.1985515963944311, "aux_distill/n_active_tok": 439.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6022411616161616, "calib/avg_num_step_conf": 6.82421875, "calib/ece": 0.20594488188976376, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03891792929292931, "calib/mean_conf": 0.2401181102362205, "calib/mu_c": 0.26218181818181824, "calib/mu_w": 0.22326388888888893, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006496062992125984, "calib/std_conf": 0.11377378537672232, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1861.0, "completions/max_terminated_length": 1861.0, "completions/mean_length": 416.515625, "completions/mean_terminated_length": 418.1490478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.20266666666666666, "grad_norm": 0.00790832657366991, "learning_rate": 2.7777777777777776e-07, "loss": 0.0408, "num_tokens": 35033043.0, "reward": 1.0671627521514893, "reward_std": 0.1164865642786026, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.716356635093689, "rewards/format_reward_step": 0.98828125, "step": 190 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3259891658090055, "aux_distill/mean_u": 0.1363302629548188, "aux_distill/n_active_tok": 425.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5315154922538731, "calib/avg_num_step_conf": 6.62890625, "calib/ece": 0.202244094488189, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013333333333333364, "calib/mean_conf": 0.277755905511811, "calib/mu_c": 0.285, "calib/mu_w": 0.2716666666666666, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011653543307086612, "calib/std_conf": 0.1221980589646629, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2261.0, "completions/max_terminated_length": 2261.0, "completions/mean_length": 373.3203125, "completions/mean_terminated_length": 373.3203125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.20373333333333332, "grad_norm": 0.008117207325994968, "learning_rate": 2.5000000000000004e-07, "loss": 0.0879, "num_tokens": 35232781.0, "reward": 1.0756475925445557, "reward_std": 0.10618412494659424, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.7059824466705322, "rewards/format_reward_step": 0.9921875, "step": 191 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35070336470380425, "aux_distill/mean_u": 0.18195896053579147, "aux_distill/n_active_tok": 388.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.645326504481434, "calib/avg_num_step_conf": 6.05078125, "calib/ece": 0.3147619047619048, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.05792317541613318, "calib/mean_conf": 0.2487301587301587, "calib/mu_c": 0.27401408450704223, "calib/mu_w": 0.21609090909090906, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.1204487912462106, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 373.5, "completions/mean_terminated_length": 373.5, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.2048, "grad_norm": 0.009182264097034931, "learning_rate": 2.2222222222222224e-07, "loss": 0.0331, "num_tokens": 35433373.0, "reward": 1.0928065776824951, "reward_std": 0.10991697758436203, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6543632745742798, "rewards/format_reward_step": 0.9765625, "step": 192 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39118539495393634, "aux_distill/mean_u": 0.18369763625575194, "aux_distill/n_active_tok": 401.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6368797168236158, "calib/avg_num_step_conf": 6.25, "calib/ece": 0.12, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.06101115340947039, "calib/mean_conf": 0.2540157480314961, "calib/mu_c": 0.29268817204301073, "calib/mu_w": 0.23167701863354034, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.003937007874015748, "calib/std_conf": 0.12262714656565303, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2157.0, "completions/max_terminated_length": 2157.0, "completions/mean_length": 391.76953125, "completions/mean_terminated_length": 391.76953125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.20586666666666667, "grad_norm": 0.008967787027359009, "learning_rate": 1.9444444444444447e-07, "loss": 0.05, "num_tokens": 35639378.0, "reward": 1.043562412261963, "reward_std": 0.1039130762219429, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.7472812533378601, "rewards/format_reward_step": 0.9765625, "step": 193 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3637796104885638, "aux_distill/mean_u": 0.18909235012238806, "aux_distill/n_active_tok": 324.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5746501749125437, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.2801181102362204, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.030187406296851516, "calib/mean_conf": 0.2729527559055119, "calib/mu_c": 0.28673913043478255, "calib/mu_w": 0.25655172413793104, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004881889763779529, "calib/std_conf": 0.12136178478979952, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2007.0, "completions/max_terminated_length": 2007.0, "completions/mean_length": 339.18359375, "completions/mean_terminated_length": 339.18359375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.20693333333333333, "grad_norm": 0.009117648005485535, "learning_rate": 1.6666666666666668e-07, "loss": 0.066, "num_tokens": 35832153.0, "reward": 1.1024904251098633, "reward_std": 0.11599205434322357, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6737308502197266, "rewards/format_reward_step": 0.9921875, "step": 194 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3681975821964443, "aux_distill/mean_u": 0.21568309308917685, "aux_distill/n_active_tok": 384.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5918079096045198, "calib/avg_num_step_conf": 6.0546875, "calib/ece": 0.2940316205533597, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03751851851851845, "calib/mean_conf": 0.24501976284584978, "calib/mu_c": 0.2625185185185185, "calib/mu_w": 0.22500000000000003, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.002727272727272727, "calib/std_conf": 0.11820001974923042, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 386.69921875, "completions/mean_terminated_length": 388.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.208, "grad_norm": 0.009348858147859573, "learning_rate": 1.3888888888888888e-07, "loss": 0.0937, "num_tokens": 36037132.0, "reward": 1.0756480693817139, "reward_std": 0.1506083607673645, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.651296079158783, "rewards/format_reward_step": 0.97265625, "step": 195 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3619554964825511, "aux_distill/mean_u": 0.16458977785492765, "aux_distill/n_active_tok": 369.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6130852713178295, "calib/avg_num_step_conf": 5.73046875, "calib/ece": 0.20559055118110234, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.050297054263565955, "calib/mean_conf": 0.28653543307086615, "calib/mu_c": 0.31208, "calib/mu_w": 0.26178294573643407, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.11713023878992854, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2781.0, "completions/max_terminated_length": 2781.0, "completions/mean_length": 337.21484375, "completions/mean_terminated_length": 337.21484375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.20906666666666668, "grad_norm": 0.008571833372116089, "learning_rate": 1.1111111111111112e-07, "loss": 0.0933, "num_tokens": 36226003.0, "reward": 1.0970335006713867, "reward_std": 0.11255770921707153, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7135984897613525, "rewards/format_reward_step": 0.9921875, "step": 196 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38431750517338514, "aux_distill/mean_u": 0.23451896721609689, "aux_distill/n_active_tok": 441.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5431704260651631, "calib/avg_num_step_conf": 6.859375, "calib/ece": 0.19200787401574806, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.022473684210526312, "calib/mean_conf": 0.26208661417322837, "calib/mu_c": 0.2744736842105263, "calib/mu_w": 0.252, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0026377952755905513, "calib/std_conf": 0.11638576391162316, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2664.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 381.55859375, "completions/mean_terminated_length": 384.56298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.21013333333333334, "grad_norm": 0.008849774487316608, "learning_rate": 8.333333333333334e-08, "loss": -0.0048, "num_tokens": 36428738.0, "reward": 1.0736178159713745, "reward_std": 0.1103426069021225, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.7097355127334595, "rewards/format_reward_step": 0.9921875, "step": 197 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38216920103877783, "aux_distill/mean_u": 0.17609322742804628, "aux_distill/n_active_tok": 426.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5772788947822869, "calib/avg_num_step_conf": 6.61328125, "calib/ece": 0.2735686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027984457875909724, "calib/mean_conf": 0.2570196078431372, "calib/mu_c": 0.2702985074626866, "calib/mu_w": 0.24231404958677685, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0025490196078431387, "calib/std_conf": 0.12130555977043575, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1590.0, "completions/max_terminated_length": 1590.0, "completions/mean_length": 379.24609375, "completions/mean_terminated_length": 380.7333679199219, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.2112, "grad_norm": 0.008252741768956184, "learning_rate": 5.555555555555556e-08, "loss": 0.0495, "num_tokens": 36631209.0, "reward": 1.097348928451538, "reward_std": 0.10788056254386902, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6751663684844971, "rewards/format_reward_step": 0.99609375, "step": 198 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35966091603040695, "aux_distill/mean_u": 0.20198334396455808, "aux_distill/n_active_tok": 390.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5756899224806202, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.2374803149606299, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.034306976744186146, "calib/mean_conf": 0.2689763779527559, "calib/mu_c": 0.28640000000000004, "calib/mu_w": 0.2520930232558139, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007165354330708661, "calib/std_conf": 0.12804917021667883, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2242.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 392.3828125, "completions/mean_terminated_length": 393.9216003417969, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.21226666666666666, "grad_norm": 0.008318433538079262, "learning_rate": 2.777777777777778e-08, "loss": 0.0254, "num_tokens": 36835859.0, "reward": 1.0880054235458374, "reward_std": 0.11319223791360855, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6955421566963196, "rewards/format_reward_step": 0.9921875, "step": 199 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36204342264682055, "aux_distill/mean_u": 0.191237035896235, "aux_distill/n_active_tok": 377.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6301767676767677, "calib/avg_num_step_conf": 5.80859375, "calib/ece": 0.22722222222222221, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.05615909090909091, "calib/mean_conf": 0.25166666666666665, "calib/mu_c": 0.28108333333333335, "calib/mu_w": 0.22492424242424244, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0013492063492063487, "calib/std_conf": 0.1274521385424647, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2290.0, "completions/max_terminated_length": 2290.0, "completions/mean_length": 396.66015625, "completions/mean_terminated_length": 398.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.21333333333333335, "grad_norm": 0.009059817530214787, "learning_rate": 0.0, "loss": 0.0259, "num_tokens": 37045452.0, "reward": 1.0666258335113525, "reward_std": 0.12554016709327698, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6918452978134155, "rewards/format_reward_step": 0.97265625, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.06970496780704707, "train_runtime": 12590.6678, "train_samples_per_second": 4.067, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 37045452, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }