{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.32, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.41015625, "calib/auroc": 0.5004346566212692, "calib/avg_num_step_conf": 2.54296875, "calib/ece": 0.6533783783783784, "calib/final_conf_rate": 0.578125, "calib/format_rate": 0.328125, "calib/frac_conf_gt_0.9": 0.49324324324324326, "calib/gap": 0.03429150970733108, "calib/mean_conf": 0.8493243243243244, "calib/mu_c": 0.876896551724138, "calib/mu_w": 0.842605042016807, "calib/nonempty_final_conf_rate": 0.578125, "calib/nonempty_reasoning_rate": 0.578125, "calib/nonempty_step_conf_rate": 0.5078125, "calib/pce": 0.6533783783783784, "calib/std_conf": 0.20379395450997492, "calib/step_conf_rate": 0.5078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 229.34765625, "completions/mean_terminated_length": 239.64488220214844, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0016, "grad_norm": 1.1471130847930908, "learning_rate": 2.5000000000000004e-07, "loss": -0.0995, "num_tokens": 384153.0, "reward": 0.43480801582336426, "reward_std": 0.6504053473472595, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.14922383427619934, "rewards/format_reward_step": 0.328125, "rewards/stepwise_brier_reward": 0.23063313961029053, "step": 1 }, { "calib/answer_extract_rate": 0.4765625, "calib/auroc": 0.5296425457715781, "calib/avg_num_step_conf": 2.4921875, "calib/ece": 0.638447204968944, "calib/final_conf_rate": 0.62890625, "calib/format_rate": 0.37890625, "calib/frac_conf_gt_0.9": 0.5217391304347826, "calib/gap": 0.03945510026155208, "calib/mean_conf": 0.8682608695652173, "calib/mu_c": 0.8986486486486487, "calib/mu_w": 0.8591935483870966, "calib/nonempty_final_conf_rate": 0.62890625, "calib/nonempty_reasoning_rate": 0.58984375, "calib/nonempty_step_conf_rate": 0.5, "calib/pce": 0.638447204968944, "calib/std_conf": 0.1677382063219749, "calib/step_conf_rate": 0.5, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 229.40625, "completions/mean_terminated_length": 239.70611572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 2.901979684829712, "learning_rate": 5.000000000000001e-07, "loss": -0.1244, "num_tokens": 790441.0, "reward": 0.511868417263031, "reward_std": 0.6400600075721741, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.1580679714679718, "rewards/format_reward_step": 0.37890625, "rewards/stepwise_brier_reward": 0.26440560817718506, "step": 2 }, { "calib/answer_extract_rate": 0.41796875, "calib/auroc": 0.5543981481481481, "calib/avg_num_step_conf": 2.10546875, "calib/ece": 0.6660000000000001, "calib/final_conf_rate": 0.546875, "calib/format_rate": 0.3359375, "calib/frac_conf_gt_0.9": 0.6214285714285714, "calib/gap": 0.04876157407407422, "calib/mean_conf": 0.8945714285714286, "calib/mu_c": 0.9321875, "calib/mu_w": 0.8834259259259257, "calib/nonempty_final_conf_rate": 0.546875, "calib/nonempty_reasoning_rate": 0.53515625, "calib/nonempty_step_conf_rate": 0.46875, "calib/pce": 0.6660000000000001, "calib/std_conf": 0.13547994806070235, "calib/step_conf_rate": 0.46875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1534.0, "completions/max_terminated_length": 1534.0, "completions/mean_length": 266.99609375, "completions/mean_terminated_length": 275.6088562011719, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0048, "grad_norm": 5.7775468826293945, "learning_rate": 7.5e-07, "loss": -0.0927, "num_tokens": 1199768.0, "reward": 0.46704915165901184, "reward_std": 0.6950008869171143, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.15416249632835388, "rewards/format_reward_step": 0.3359375, "rewards/stepwise_brier_reward": 0.26872166991233826, "step": 3 }, { "calib/answer_extract_rate": 0.421875, "calib/auroc": 0.5758771929824561, "calib/avg_num_step_conf": 1.71484375, "calib/ece": 0.716492537313433, "calib/final_conf_rate": 0.5234375, "calib/format_rate": 0.30078125, "calib/frac_conf_gt_0.9": 0.48507462686567165, "calib/gap": 0.05084210526315769, "calib/mean_conf": 0.8657462686567163, "calib/mu_c": 0.9089999999999998, "calib/mu_w": 0.8581578947368421, "calib/nonempty_final_conf_rate": 0.5234375, "calib/nonempty_reasoning_rate": 0.51953125, "calib/nonempty_step_conf_rate": 0.4140625, "calib/pce": 0.716492537313433, "calib/std_conf": 0.17957818263950365, "calib/step_conf_rate": 0.4140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 232.78515625, "completions/mean_terminated_length": 243.23672485351562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 1.5228453874588013, "learning_rate": 1.0000000000000002e-06, "loss": -0.11, "num_tokens": 1612633.0, "reward": 0.3533709645271301, "reward_std": 0.52557772397995, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.10705117136240005, "rewards/format_reward_step": 0.30078125, "rewards/stepwise_brier_reward": 0.23612016439437866, "step": 4 }, { "calib/answer_extract_rate": 0.6171875, "calib/auroc": 0.5364382239382239, "calib/avg_num_step_conf": 2.953125, "calib/ece": 0.7019886363636365, "calib/final_conf_rate": 0.6875, "calib/format_rate": 0.49609375, "calib/frac_conf_gt_0.9": 0.5227272727272727, "calib/gap": 0.045009652509652276, "calib/mean_conf": 0.8610795454545456, "calib/mu_c": 0.8989285714285712, "calib/mu_w": 0.8539189189189189, "calib/nonempty_final_conf_rate": 0.6875, "calib/nonempty_reasoning_rate": 0.71875, "calib/nonempty_step_conf_rate": 0.609375, "calib/pce": 0.7019886363636365, "calib/std_conf": 0.18420429133233548, "calib/step_conf_rate": 0.609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 1381.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 282.92578125, "completions/mean_terminated_length": 293.2348327636719, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008, "grad_norm": 0.5310338735580444, "learning_rate": 1.25e-06, "loss": -0.0677, "num_tokens": 2046894.0, "reward": 0.543309211730957, "reward_std": 0.632076621055603, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.17852148413658142, "rewards/format_reward_step": 0.49609375, "rewards/stepwise_brier_reward": 0.3462778329849243, "step": 5 }, { "calib/answer_extract_rate": 0.6484375, "calib/auroc": 0.5823987333421295, "calib/avg_num_step_conf": 3.1484375, "calib/ece": 0.5968367346938777, "calib/final_conf_rate": 0.765625, "calib/format_rate": 0.55078125, "calib/frac_conf_gt_0.9": 0.5408163265306123, "calib/gap": 0.06417205436073359, "calib/mean_conf": 0.8571428571428571, "calib/mu_c": 0.9039622641509434, "calib/mu_w": 0.8397902097902098, "calib/nonempty_final_conf_rate": 0.765625, "calib/nonempty_reasoning_rate": 0.7890625, "calib/nonempty_step_conf_rate": 0.70703125, "calib/pce": 0.5917857142857144, "calib/std_conf": 0.19894620337351177, "calib/step_conf_rate": 0.70703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1501.0, "completions/max_terminated_length": 1501.0, "completions/mean_length": 250.95703125, "completions/mean_terminated_length": 259.0523986816406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 3.514328718185425, "learning_rate": 1.5e-06, "loss": -0.0934, "num_tokens": 2465075.0, "reward": 0.7629420161247253, "reward_std": 0.801480770111084, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.25608164072036743, "rewards/format_reward_step": 0.55078125, "rewards/stepwise_brier_reward": 0.42849892377853394, "step": 6 }, { "calib/answer_extract_rate": 0.6015625, "calib/auroc": 0.6462039297756075, "calib/avg_num_step_conf": 3.23046875, "calib/ece": 0.5529207920792079, "calib/final_conf_rate": 0.7890625, "calib/format_rate": 0.5, "calib/frac_conf_gt_0.9": 0.5247524752475248, "calib/gap": 0.10030810370887144, "calib/mean_conf": 0.8549009900990099, "calib/mu_c": 0.9249180327868857, "calib/mu_w": 0.8246099290780142, "calib/nonempty_final_conf_rate": 0.7890625, "calib/nonempty_reasoning_rate": 0.75, "calib/nonempty_step_conf_rate": 0.6640625, "calib/pce": 0.5529207920792079, "calib/std_conf": 0.18184216880431048, "calib/step_conf_rate": 0.6640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 247.078125, "completions/mean_terminated_length": 252.0, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0112, "grad_norm": 0.46240970492362976, "learning_rate": 1.75e-06, "loss": -0.0902, "num_tokens": 2874079.0, "reward": 0.775140106678009, "reward_std": 0.7889308929443359, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.2891632914543152, "rewards/format_reward_step": 0.5, "rewards/stepwise_brier_reward": 0.38170963525772095, "step": 7 }, { "calib/answer_extract_rate": 0.73828125, "calib/auroc": 0.5490196078431373, "calib/avg_num_step_conf": 4.1328125, "calib/ece": 0.5892924528301888, "calib/final_conf_rate": 0.828125, "calib/format_rate": 0.66796875, "calib/frac_conf_gt_0.9": 0.5094339622641509, "calib/gap": 0.05312174587349039, "calib/mean_conf": 0.8675943396226415, "calib/mu_c": 0.9059322033898304, "calib/mu_w": 0.85281045751634, "calib/nonempty_final_conf_rate": 0.828125, "calib/nonempty_reasoning_rate": 0.85546875, "calib/nonempty_step_conf_rate": 0.7890625, "calib/pce": 0.5892924528301888, "calib/std_conf": 0.15787912012591696, "calib/step_conf_rate": 0.7890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 293.1953125, "completions/mean_terminated_length": 296.67193603515625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 2.790337324142456, "learning_rate": 2.0000000000000003e-06, "loss": -0.039, "num_tokens": 3280009.0, "reward": 0.8816836476325989, "reward_std": 0.7367245554924011, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.329680860042572, "rewards/format_reward_step": 0.66796875, "rewards/stepwise_brier_reward": 0.4783037602901459, "step": 8 }, { "calib/answer_extract_rate": 0.77734375, "calib/auroc": 0.575652841781874, "calib/avg_num_step_conf": 4.96875, "calib/ece": 0.658868778280543, "calib/final_conf_rate": 0.86328125, "calib/format_rate": 0.75, "calib/frac_conf_gt_0.9": 0.39819004524886875, "calib/gap": 0.07864362519201207, "calib/mean_conf": 0.8172398190045249, "calib/mu_c": 0.8834285714285712, "calib/mu_w": 0.8047849462365592, "calib/nonempty_final_conf_rate": 0.86328125, "calib/nonempty_reasoning_rate": 0.8984375, "calib/nonempty_step_conf_rate": 0.875, "calib/pce": 0.658868778280543, "calib/std_conf": 0.21989730521265205, "calib/step_conf_rate": 0.875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 288.578125, "completions/mean_terminated_length": 296.6907653808594, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0144, "grad_norm": 0.49306055903434753, "learning_rate": 2.25e-06, "loss": -0.0522, "num_tokens": 3713349.0, "reward": 0.7794851660728455, "reward_std": 0.5652980804443359, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.31141015887260437, "rewards/format_reward_step": 0.75, "rewards/stepwise_brier_reward": 0.4862178862094879, "step": 9 }, { "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.6339285714285713, "calib/avg_num_step_conf": 5.59765625, "calib/ece": 0.6440000000000003, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.4875, "calib/gap": 0.05798136645962737, "calib/mean_conf": 0.8773333333333333, "calib/mu_c": 0.9217857142857143, "calib/mu_w": 0.863804347826087, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.6440000000000003, "calib/std_conf": 0.13314611856486425, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 276.734375, "completions/mean_terminated_length": 280.01580810546875, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.016, "grad_norm": 0.18479806184768677, "learning_rate": 2.5e-06, "loss": -0.0062, "num_tokens": 4130337.0, "reward": 1.016362190246582, "reward_std": 0.5757254362106323, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.36681604385375977, "rewards/format_reward_step": 0.89453125, "rewards/stepwise_brier_reward": 0.5970702767372131, "step": 10 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5444670050761422, "calib/avg_num_step_conf": 5.171875, "calib/ece": 0.6605263157894737, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.46153846153846156, "calib/gap": 0.021962436548223274, "calib/mean_conf": 0.8602834008097167, "calib/mu_c": 0.8778, "calib/mu_w": 0.8558375634517768, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6591902834008098, "calib/std_conf": 0.15018046043386205, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 946.0, "completions/max_terminated_length": 946.0, "completions/mean_length": 253.4609375, "completions/mean_terminated_length": 256.4664001464844, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.0176, "grad_norm": 0.3017382323741913, "learning_rate": 2.7500000000000004e-06, "loss": -0.0158, "num_tokens": 4553111.0, "reward": 1.0321794748306274, "reward_std": 0.512408435344696, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.3711339831352234, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.6716465950012207, "step": 11 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6409646739130436, "calib/avg_num_step_conf": 5.16015625, "calib/ece": 0.5009126984126984, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.42063492063492064, "calib/gap": 0.06845923913043461, "calib/mean_conf": 0.8580555555555555, "calib/mu_c": 0.9015217391304348, "calib/mu_w": 0.8330625000000002, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.49694444444444436, "calib/std_conf": 0.15787964099453852, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 262.90234375, "completions/mean_terminated_length": 266.019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.0192, "grad_norm": 0.09093901515007019, "learning_rate": 3e-06, "loss": 0.0092, "num_tokens": 4969230.0, "reward": 1.3576459884643555, "reward_std": 0.6042752265930176, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.5232961177825928, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7901004552841187, "step": 12 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6115916955017302, "calib/avg_num_step_conf": 5.234375, "calib/ece": 0.5064313725490198, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3607843137254902, "calib/gap": 0.06776470588235295, "calib/mean_conf": 0.8397647058823529, "calib/mu_c": 0.8849411764705883, "calib/mu_w": 0.8171764705882354, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.5064313725490198, "calib/std_conf": 0.16581097059607222, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 254.0703125, "completions/mean_terminated_length": 258.1031799316406, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.0208, "grad_norm": 0.2129509001970291, "learning_rate": 3.2500000000000002e-06, "loss": -0.0004, "num_tokens": 5393472.0, "reward": 1.2935614585876465, "reward_std": 0.4629979729652405, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.514092206954956, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.707028865814209, "step": 13 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.631770498822522, "calib/avg_num_step_conf": 5.16796875, "calib/ece": 0.500472440944882, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3031496062992126, "calib/gap": 0.08883608078213079, "calib/mean_conf": 0.8193700787401575, "calib/mu_c": 0.8798765432098766, "calib/mu_w": 0.7910404624277458, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.500472440944882, "calib/std_conf": 0.17302427134595005, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 661.0, "completions/max_terminated_length": 661.0, "completions/mean_length": 243.65234375, "completions/mean_terminated_length": 247.5198516845703, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.0224, "grad_norm": 0.07910128682851791, "learning_rate": 3.5e-06, "loss": 0.0066, "num_tokens": 5813583.0, "reward": 1.2911372184753418, "reward_std": 0.4125267565250397, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.5366746187210083, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.752874493598938, "step": 14 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5837162350320245, "calib/avg_num_step_conf": 5.390625, "calib/ece": 0.44505882352941173, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.19607843137254902, "calib/gap": 0.06510233918128638, "calib/mean_conf": 0.7738431372549021, "calib/mu_c": 0.8175, "calib/mu_w": 0.7523976608187136, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4447450980392157, "calib/std_conf": 0.18299505752985162, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 731.0, "completions/max_terminated_length": 731.0, "completions/mean_length": 264.93359375, "completions/mean_terminated_length": 269.138916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.024, "grad_norm": 0.09459760040044785, "learning_rate": 3.7500000000000005e-06, "loss": -0.0053, "num_tokens": 6235486.0, "reward": 1.3260624408721924, "reward_std": 0.4246816039085388, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.5746027231216431, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7687090635299683, "step": 15 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.64665304036791, "calib/avg_num_step_conf": 5.72265625, "calib/ece": 0.2550980392156863, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.08627450980392157, "calib/gap": 0.1255825242718448, "calib/mean_conf": 0.6557254901960784, "calib/mu_c": 0.7305825242718448, "calib/mu_w": 0.605, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25345098039215685, "calib/std_conf": 0.23075188937100627, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 755.0, "completions/max_terminated_length": 755.0, "completions/mean_length": 277.08984375, "completions/mean_terminated_length": 281.48809814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.0256, "grad_norm": 0.09741365909576416, "learning_rate": 4.000000000000001e-06, "loss": -0.0131, "num_tokens": 6675669.0, "reward": 1.4592740535736084, "reward_std": 0.45420658588409424, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6949933767318726, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7514780759811401, "step": 16 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6593394886363637, "calib/avg_num_step_conf": 5.6796875, "calib/ece": 0.3116796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05078125, "calib/gap": 0.13501136363636368, "calib/mean_conf": 0.6241796875000001, "calib/mu_c": 0.717, "calib/mu_w": 0.5819886363636363, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3116796875, "calib/std_conf": 0.2192763999782064, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 695.0, "completions/max_terminated_length": 695.0, "completions/mean_length": 264.9296875, "completions/mean_terminated_length": 269.13494873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.0272, "grad_norm": 0.26401224732398987, "learning_rate": 4.25e-06, "loss": 0.0128, "num_tokens": 7067083.0, "reward": 1.3285796642303467, "reward_std": 0.42579442262649536, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6931965351104736, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7617468237876892, "step": 17 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6271983225108225, "calib/avg_num_step_conf": 5.765625, "calib/ece": 0.21310546874999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0923782467532468, "calib/mean_conf": 0.54943359375, "calib/mu_c": 0.6100568181818182, "calib/mu_w": 0.5176785714285714, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20939453124999993, "calib/std_conf": 0.19860984514925226, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 787.0, "completions/max_terminated_length": 787.0, "completions/mean_length": 280.92578125, "completions/mean_terminated_length": 285.38494873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.0288, "grad_norm": 0.07939291000366211, "learning_rate": 4.5e-06, "loss": -0.0106, "num_tokens": 7489616.0, "reward": 1.3866751194000244, "reward_std": 0.384634792804718, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.731840968132019, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7601720690727234, "step": 18 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6573951434878588, "calib/avg_num_step_conf": 5.40625, "calib/ece": 0.1323828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.136558183538316, "calib/mean_conf": 0.5040234375, "calib/mu_c": 0.5845714285714286, "calib/mu_w": 0.44801324503311263, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.113125, "calib/std_conf": 0.21852091450404373, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 599.0, "completions/max_terminated_length": 599.0, "completions/mean_length": 261.3671875, "completions/mean_terminated_length": 265.5158996582031, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.0304, "grad_norm": 0.0737985372543335, "learning_rate": 4.75e-06, "loss": 0.0071, "num_tokens": 7910702.0, "reward": 1.4832749366760254, "reward_std": 0.37495696544647217, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7626621127128601, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7251256108283997, "step": 19 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6827485380116959, "calib/avg_num_step_conf": 5.48828125, "calib/ece": 0.142578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.12841520467836265, "calib/mean_conf": 0.40523437500000004, "calib/mu_c": 0.49552631578947376, "calib/mu_w": 0.3671111111111111, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12546875, "calib/std_conf": 0.21182416367912177, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 721.0, "completions/max_terminated_length": 721.0, "completions/mean_length": 260.56640625, "completions/mean_terminated_length": 264.702392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.032, "grad_norm": 0.12154613435268402, "learning_rate": 5e-06, "loss": -0.0238, "num_tokens": 8331383.0, "reward": 1.3289387226104736, "reward_std": 0.37291592359542847, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7844409942626953, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7578766345977783, "step": 20 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7274920772111784, "calib/avg_num_step_conf": 5.640625, "calib/ece": 0.05320312500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.14939786804955343, "calib/mean_conf": 0.28625, "calib/mu_c": 0.3901282051282051, "calib/mu_w": 0.24073033707865168, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0173828125, "calib/std_conf": 0.19753362055609675, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 670.0, "completions/max_terminated_length": 670.0, "completions/mean_length": 282.7109375, "completions/mean_terminated_length": 287.19842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.0336, "grad_norm": 1.1783859729766846, "learning_rate": 4.9722222222222224e-06, "loss": 0.0039, "num_tokens": 8757205.0, "reward": 1.3317503929138184, "reward_std": 0.22663286328315735, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.8120882511138916, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6867885589599609, "step": 21 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.92578125, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 37.64453125, "completions/mean_terminated_length": 507.2105407714844, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0352, "grad_norm": 0.0, "learning_rate": 4.944444444444445e-06, "loss": 0.0, "num_tokens": 9116962.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 22 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7493589743589744, "calib/avg_num_step_conf": 5.2265625, "calib/ece": 0.24708661417322836, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.10894487179487176, "calib/mean_conf": 0.16787401574803149, "calib/mu_c": 0.23221153846153844, "calib/mu_w": 0.12326666666666668, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0027559055118110236, "calib/std_conf": 0.1342205842273474, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 504.0, "completions/max_terminated_length": 504.0, "completions/mean_length": 252.14453125, "completions/mean_terminated_length": 256.1468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.0368, "grad_norm": 3.252777576446533, "learning_rate": 4.9166666666666665e-06, "loss": -0.0153, "num_tokens": 9537703.0, "reward": 1.454376220703125, "reward_std": 0.3514174520969391, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.7256972789764404, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6855578422546387, "step": 23 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6726219217769194, "calib/avg_num_step_conf": 5.2890625, "calib/ece": 0.03889687500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0620073394495413, "calib/mean_conf": 0.12219687500000002, "calib/mu_c": 0.17500000000000002, "calib/mu_w": 0.11299266055045872, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.006328125, "calib/std_conf": 0.11195482901703872, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 657.0, "completions/max_terminated_length": 657.0, "completions/mean_length": 266.84765625, "completions/mean_terminated_length": 271.0833435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.0384, "grad_norm": 7.944537162780762, "learning_rate": 4.888888888888889e-06, "loss": -0.0158, "num_tokens": 9951088.0, "reward": 1.089248776435852, "reward_std": 0.29503536224365234, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8644500970840454, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.625357449054718, "step": 24 }, { "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.6397745571658615, "calib/avg_num_step_conf": 5.22265625, "calib/ece": 0.05970464135021098, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04932850241545894, "calib/mean_conf": 0.09658227848101265, "calib/mu_c": 0.1396666666666667, "calib/mu_w": 0.09033816425120775, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.01485232067510549, "calib/std_conf": 0.11345981361786617, "calib/step_conf_rate": 0.91796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 320.12890625, "completions/mean_terminated_length": 337.255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.04, "grad_norm": 20.122507095336914, "learning_rate": 4.861111111111111e-06, "loss": -0.0936, "num_tokens": 10391969.0, "reward": 0.9744135141372681, "reward_std": 0.32630473375320435, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.7882003784179688, "rewards/format_reward_step": 0.890625, "rewards/stepwise_brier_reward": 0.6250784397125244, "step": 25 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9375, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 32.34375, "completions/mean_terminated_length": 517.5, "completions/min_length": 0.0, "completions/min_terminated_length": 37.0, "epoch": 0.0416, "grad_norm": 0.0, "learning_rate": 4.833333333333333e-06, "loss": 0.0, "num_tokens": 10764321.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 26 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.91796875, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 34.5546875, "completions/mean_terminated_length": 421.23809814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0432, "grad_norm": 0.0, "learning_rate": 4.805555555555556e-06, "loss": 0.0, "num_tokens": 11116359.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 27 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.90625, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 33.015625, "completions/mean_terminated_length": 352.16668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0448, "grad_norm": 0.0, "learning_rate": 4.777777777777778e-06, "loss": 0.0, "num_tokens": 11469995.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 28 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.89453125, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 35.203125, "completions/mean_terminated_length": 333.77777099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.0464, "grad_norm": 0.0, "learning_rate": 4.75e-06, "loss": 0.0, "num_tokens": 11838655.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 29 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.640625, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 141.12109375, "completions/mean_terminated_length": 392.6847839355469, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.048, "grad_norm": 0.0, "learning_rate": 4.722222222222222e-06, "loss": 0.0, "num_tokens": 12215350.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 30 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.5625, "completions/max_length": 1526.0, "completions/max_terminated_length": 1526.0, "completions/mean_length": 210.26953125, "completions/mean_terminated_length": 480.6160888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0496, "grad_norm": 0.0, "learning_rate": 4.694444444444445e-06, "loss": 0.0, "num_tokens": 12614987.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 31 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.640625, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 178.796875, "completions/mean_terminated_length": 497.5217590332031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0512, "grad_norm": 0.0, "learning_rate": 4.666666666666667e-06, "loss": 0.0, "num_tokens": 12997791.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 32 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.95703125, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 22.359375, "completions/mean_terminated_length": 520.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.0528, "grad_norm": 0.0, "learning_rate": 4.638888888888889e-06, "loss": 0.0, "num_tokens": 13355163.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 33 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9765625, "completions/max_length": 579.0, "completions/max_terminated_length": 579.0, "completions/mean_length": 4.125, "completions/mean_terminated_length": 176.0, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0544, "grad_norm": 0.0, "learning_rate": 4.611111111111112e-06, "loss": 0.0, "num_tokens": 13708307.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 34 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 6.578125, "completions/mean_terminated_length": 561.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.056, "grad_norm": 0.0, "learning_rate": 4.583333333333333e-06, "loss": 0.0, "num_tokens": 14065343.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 35 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 899.0, "completions/max_terminated_length": 899.0, "completions/mean_length": 3.6640625, "completions/mean_terminated_length": 469.0, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.0576, "grad_norm": 0.0, "learning_rate": 4.555555555555556e-06, "loss": 0.0, "num_tokens": 14410681.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 36 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9765625, "completions/max_length": 978.0, "completions/max_terminated_length": 978.0, "completions/mean_length": 6.95703125, "completions/mean_terminated_length": 296.8333435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.0592, "grad_norm": 0.0, "learning_rate": 4.527777777777778e-06, "loss": 0.0, "num_tokens": 14765214.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 37 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 277.0, "completions/max_terminated_length": 277.0, "completions/mean_length": 1.08203125, "completions/mean_terminated_length": 277.0, "completions/min_length": 0.0, "completions/min_terminated_length": 277.0, "epoch": 0.0608, "grad_norm": 0.0, "learning_rate": 4.5e-06, "loss": 0.0, "num_tokens": 15121371.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 38 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98046875, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 8.4140625, "completions/mean_terminated_length": 430.8000183105469, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.0624, "grad_norm": 0.0, "learning_rate": 4.472222222222223e-06, "loss": 0.0, "num_tokens": 15470269.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 39 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 1535.0, "completions/max_terminated_length": 1535.0, "completions/mean_length": 7.1484375, "completions/mean_terminated_length": 610.0, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.064, "grad_norm": 0.0, "learning_rate": 4.444444444444444e-06, "loss": 0.0, "num_tokens": 15814235.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 40 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.984375, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 14.02734375, "completions/mean_terminated_length": 897.75, "completions/min_length": 0.0, "completions/min_terminated_length": 16.0, "epoch": 0.0656, "grad_norm": 0.0, "learning_rate": 4.416666666666667e-06, "loss": 0.0, "num_tokens": 16177746.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 41 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.97265625, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 18.02734375, "completions/mean_terminated_length": 659.2857666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.0672, "grad_norm": 0.0, "learning_rate": 4.388888888888889e-06, "loss": 0.0, "num_tokens": 16525649.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 42 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 32.0, "completions/max_terminated_length": 32.0, "completions/mean_length": 0.125, "completions/mean_terminated_length": 32.0, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.0688, "grad_norm": 0.0, "learning_rate": 4.361111111111112e-06, "loss": 0.0, "num_tokens": 16885265.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 43 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98046875, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 11.9296875, "completions/mean_terminated_length": 610.7999877929688, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.0704, "grad_norm": 0.0, "learning_rate": 4.333333333333334e-06, "loss": 0.0, "num_tokens": 17250447.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 44 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.96875, "completions/max_length": 1028.0, "completions/max_terminated_length": 1028.0, "completions/mean_length": 12.37109375, "completions/mean_terminated_length": 395.875, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.072, "grad_norm": 0.0, "learning_rate": 4.305555555555556e-06, "loss": 0.0, "num_tokens": 17601958.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 45 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9609375, "completions/max_length": 961.0, "completions/max_terminated_length": 961.0, "completions/mean_length": 10.73828125, "completions/mean_terminated_length": 274.8999938964844, "completions/min_length": 0.0, "completions/min_terminated_length": 30.0, "epoch": 0.0736, "grad_norm": 0.0, "learning_rate": 4.277777777777778e-06, "loss": 0.0, "num_tokens": 17933307.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 46 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.96484375, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 22.984375, "completions/mean_terminated_length": 653.7777709960938, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.0752, "grad_norm": 0.0, "learning_rate": 4.25e-06, "loss": 0.0, "num_tokens": 18275847.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 47 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.94140625, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 19.94140625, "completions/mean_terminated_length": 340.3333435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.0768, "grad_norm": 0.0, "learning_rate": 4.222222222222223e-06, "loss": 0.0, "num_tokens": 18617752.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 48 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.91796875, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 29.03515625, "completions/mean_terminated_length": 353.952392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0784, "grad_norm": 0.0, "learning_rate": 4.194444444444445e-06, "loss": 0.0, "num_tokens": 18986769.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 49 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.921875, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 27.54296875, "completions/mean_terminated_length": 352.5500183105469, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 4.166666666666667e-06, "loss": 0.0, "num_tokens": 19344444.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 50 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9453125, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 20.5, "completions/mean_terminated_length": 374.8571472167969, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.0816, "grad_norm": 0.0, "learning_rate": 4.138888888888889e-06, "loss": 0.0, "num_tokens": 19700036.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 51 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.94140625, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 43.10546875, "completions/mean_terminated_length": 735.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0832, "grad_norm": 0.0, "learning_rate": 4.111111111111111e-06, "loss": 0.0, "num_tokens": 20068951.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 52 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.94921875, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 25.51171875, "completions/mean_terminated_length": 502.3846435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0848, "grad_norm": 0.0, "learning_rate": 4.083333333333334e-06, "loss": 0.0, "num_tokens": 20424602.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 53 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9453125, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 19.23828125, "completions/mean_terminated_length": 351.7857360839844, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0864, "grad_norm": 0.0, "learning_rate": 4.055555555555556e-06, "loss": 0.0, "num_tokens": 20775135.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 54 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9375, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 33.828125, "completions/mean_terminated_length": 541.25, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.088, "grad_norm": 0.0, "learning_rate": 4.027777777777779e-06, "loss": 0.0, "num_tokens": 21146899.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 55 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9453125, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 35.44140625, "completions/mean_terminated_length": 648.0714721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.0896, "grad_norm": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0, "num_tokens": 21513652.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 56 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.92578125, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 42.671875, "completions/mean_terminated_length": 574.9473876953125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0912, "grad_norm": 0.0, "learning_rate": 3.972222222222223e-06, "loss": 0.0, "num_tokens": 21864400.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 57 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.91015625, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 34.66015625, "completions/mean_terminated_length": 385.7826232910156, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0928, "grad_norm": 0.0, "learning_rate": 3.944444444444445e-06, "loss": 0.0, "num_tokens": 22225137.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 58 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.92578125, "completions/max_length": 1229.0, "completions/max_terminated_length": 1229.0, "completions/mean_length": 42.15625, "completions/mean_terminated_length": 568.0, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.0944, "grad_norm": 0.0, "learning_rate": 3.916666666666667e-06, "loss": 0.0, "num_tokens": 22591353.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 59 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.88671875, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 59.44921875, "completions/mean_terminated_length": 524.7930908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.096, "grad_norm": 0.0, "learning_rate": 3.88888888888889e-06, "loss": 0.0, "num_tokens": 22951116.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 60 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.95703125, "completions/max_length": 1140.0, "completions/max_terminated_length": 1140.0, "completions/mean_length": 18.49609375, "completions/mean_terminated_length": 430.4545593261719, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.0976, "grad_norm": 0.0, "learning_rate": 3.861111111111112e-06, "loss": 0.0, "num_tokens": 23313251.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 61 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.90625, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 60.0625, "completions/mean_terminated_length": 640.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.0992, "grad_norm": 0.0, "learning_rate": 3.833333333333334e-06, "loss": 0.0, "num_tokens": 23674035.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 62 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.93359375, "completions/max_length": 1509.0, "completions/max_terminated_length": 1509.0, "completions/mean_length": 33.50390625, "completions/mean_terminated_length": 504.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.1008, "grad_norm": 0.0, "learning_rate": 3.8055555555555556e-06, "loss": 0.0, "num_tokens": 24035628.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 63 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.91796875, "completions/max_length": 1513.0, "completions/max_terminated_length": 1513.0, "completions/mean_length": 53.875, "completions/mean_terminated_length": 656.7619018554688, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.1024, "grad_norm": 0.0, "learning_rate": 3.777777777777778e-06, "loss": 0.0, "num_tokens": 24413276.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 64 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1504.0, "completions/max_terminated_length": 1504.0, "completions/mean_length": 71.078125, "completions/mean_terminated_length": 551.3939819335938, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.104, "grad_norm": 0.0, "learning_rate": 3.7500000000000005e-06, "loss": 0.0, "num_tokens": 24786392.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 65 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.90234375, "completions/max_length": 1521.0, "completions/max_terminated_length": 1521.0, "completions/mean_length": 60.75, "completions/mean_terminated_length": 622.0799560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.1056, "grad_norm": 0.0, "learning_rate": 3.7222222222222225e-06, "loss": 0.0, "num_tokens": 25162168.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 66 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.88671875, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 64.5546875, "completions/mean_terminated_length": 569.862060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.1072, "grad_norm": 0.0, "learning_rate": 3.694444444444445e-06, "loss": 0.0, "num_tokens": 25540302.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 67 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1508.0, "completions/max_terminated_length": 1508.0, "completions/mean_length": 72.8046875, "completions/mean_terminated_length": 564.7879028320312, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.1088, "grad_norm": 0.0, "learning_rate": 3.6666666666666666e-06, "loss": 0.0, "num_tokens": 25910252.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 68 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8828125, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 63.8671875, "completions/mean_terminated_length": 545.0, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.1104, "grad_norm": 0.0, "learning_rate": 3.638888888888889e-06, "loss": 0.0, "num_tokens": 26256754.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 69 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.875, "completions/max_length": 1516.0, "completions/max_terminated_length": 1516.0, "completions/mean_length": 77.11328125, "completions/mean_terminated_length": 616.90625, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.112, "grad_norm": 0.0, "learning_rate": 3.6111111111111115e-06, "loss": 0.0, "num_tokens": 26630207.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 70 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.90625, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 57.27734375, "completions/mean_terminated_length": 610.9583740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1136, "grad_norm": 0.0, "learning_rate": 3.5833333333333335e-06, "loss": 0.0, "num_tokens": 26989702.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 71 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9140625, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 44.58984375, "completions/mean_terminated_length": 518.8636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.1152, "grad_norm": 0.0, "learning_rate": 3.555555555555556e-06, "loss": 0.0, "num_tokens": 27341005.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 72 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1266.0, "completions/max_terminated_length": 1266.0, "completions/mean_length": 63.0859375, "completions/mean_terminated_length": 475.0, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.1168, "grad_norm": 0.0, "learning_rate": 3.5277777777777784e-06, "loss": 0.0, "num_tokens": 27711395.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 73 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8984375, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 62.2265625, "completions/mean_terminated_length": 612.6923217773438, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.1184, "grad_norm": 0.0, "learning_rate": 3.5e-06, "loss": 0.0, "num_tokens": 28063365.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 74 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.890625, "completions/max_length": 1501.0, "completions/max_terminated_length": 1501.0, "completions/mean_length": 68.1171875, "completions/mean_terminated_length": 622.7857666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 3.4722222222222224e-06, "loss": 0.0, "num_tokens": 28441107.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 75 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.890625, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 54.921875, "completions/mean_terminated_length": 502.14288330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.1216, "grad_norm": 0.0, "learning_rate": 3.444444444444445e-06, "loss": 0.0, "num_tokens": 28801687.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 76 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.90234375, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 47.8203125, "completions/mean_terminated_length": 489.67999267578125, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.1232, "grad_norm": 0.0, "learning_rate": 3.416666666666667e-06, "loss": 0.0, "num_tokens": 29165233.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 77 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8984375, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 61.1796875, "completions/mean_terminated_length": 602.3846435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.1248, "grad_norm": 0.0, "learning_rate": 3.3888888888888893e-06, "loss": 0.0, "num_tokens": 29528071.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 78 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 97.87109375, "completions/mean_terminated_length": 677.1621704101562, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.1264, "grad_norm": 0.0, "learning_rate": 3.3611111111111117e-06, "loss": 0.0, "num_tokens": 29903614.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 79 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.86328125, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 75.36328125, "completions/mean_terminated_length": 551.2285766601562, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.128, "grad_norm": 0.0, "learning_rate": 3.3333333333333333e-06, "loss": 0.0, "num_tokens": 30275523.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 80 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1515.0, "completions/max_terminated_length": 1515.0, "completions/mean_length": 64.75390625, "completions/mean_terminated_length": 502.3333435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.1296, "grad_norm": 0.0, "learning_rate": 3.3055555555555558e-06, "loss": 0.0, "num_tokens": 30642444.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 81 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83984375, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 98.23828125, "completions/mean_terminated_length": 613.3901977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.1312, "grad_norm": 0.0, "learning_rate": 3.277777777777778e-06, "loss": 0.0, "num_tokens": 31021409.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 82 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8125, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 94.79296875, "completions/mean_terminated_length": 505.5625, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.1328, "grad_norm": 0.0, "learning_rate": 3.2500000000000002e-06, "loss": 0.0, "num_tokens": 31394028.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 83 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 64.3984375, "completions/mean_terminated_length": 499.5757751464844, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.1344, "grad_norm": 0.0, "learning_rate": 3.2222222222222227e-06, "loss": 0.0, "num_tokens": 31771066.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 84 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 75.44921875, "completions/mean_terminated_length": 568.0882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.136, "grad_norm": 0.0, "learning_rate": 3.1944444444444443e-06, "loss": 0.0, "num_tokens": 32135573.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 85 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83984375, "completions/max_length": 1529.0, "completions/max_terminated_length": 1529.0, "completions/mean_length": 73.3359375, "completions/mean_terminated_length": 457.9024353027344, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.1376, "grad_norm": 0.0, "learning_rate": 3.1666666666666667e-06, "loss": 0.0, "num_tokens": 32495427.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 86 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 86.609375, "completions/mean_terminated_length": 583.4736938476562, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.1392, "grad_norm": 0.0, "learning_rate": 3.138888888888889e-06, "loss": 0.0, "num_tokens": 32866751.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 87 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1512.0, "completions/max_terminated_length": 1512.0, "completions/mean_length": 74.3125, "completions/mean_terminated_length": 514.1621704101562, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.1408, "grad_norm": 0.0, "learning_rate": 3.1111111111111116e-06, "loss": 0.0, "num_tokens": 33228319.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 88 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.89453125, "completions/max_length": 1529.0, "completions/max_terminated_length": 1529.0, "completions/mean_length": 57.05859375, "completions/mean_terminated_length": 541.0, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.1424, "grad_norm": 0.0, "learning_rate": 3.0833333333333336e-06, "loss": 0.0, "num_tokens": 33598326.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 89 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 94.0078125, "completions/mean_terminated_length": 601.6500244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.144, "grad_norm": 0.0, "learning_rate": 3.055555555555556e-06, "loss": 0.0, "num_tokens": 33967808.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 90 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 99.3203125, "completions/mean_terminated_length": 635.6500244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1456, "grad_norm": 0.0, "learning_rate": 3.0277777777777776e-06, "loss": 0.0, "num_tokens": 34329962.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 91 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 92.9375, "completions/mean_terminated_length": 566.4761962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.1472, "grad_norm": 0.0, "learning_rate": 3e-06, "loss": 0.0, "num_tokens": 34702698.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 92 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 90.41015625, "completions/mean_terminated_length": 642.9166870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.1488, "grad_norm": 0.0, "learning_rate": 2.9722222222222225e-06, "loss": 0.0, "num_tokens": 35075211.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 93 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 66.79296875, "completions/mean_terminated_length": 474.97222900390625, "completions/min_length": 0.0, "completions/min_terminated_length": 19.0, "epoch": 0.1504, "grad_norm": 0.0, "learning_rate": 2.944444444444445e-06, "loss": 0.0, "num_tokens": 35436910.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 94 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 97.078125, "completions/mean_terminated_length": 591.7142944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.152, "grad_norm": 0.0, "learning_rate": 2.916666666666667e-06, "loss": 0.0, "num_tokens": 35819866.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 95 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8828125, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 59.984375, "completions/mean_terminated_length": 511.86669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.1536, "grad_norm": 0.0, "learning_rate": 2.888888888888889e-06, "loss": 0.0, "num_tokens": 36199110.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 96 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.88671875, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 63.32421875, "completions/mean_terminated_length": 559.0, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.1552, "grad_norm": 0.0, "learning_rate": 2.861111111111111e-06, "loss": 0.0, "num_tokens": 36558633.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 97 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 102.02734375, "completions/mean_terminated_length": 687.3421020507812, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.1568, "grad_norm": 0.0, "learning_rate": 2.8333333333333335e-06, "loss": 0.0, "num_tokens": 36922352.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 98 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1522.0, "completions/max_terminated_length": 1522.0, "completions/mean_length": 101.88671875, "completions/mean_terminated_length": 621.0238037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 21.0, "epoch": 0.1584, "grad_norm": 0.0, "learning_rate": 2.805555555555556e-06, "loss": 0.0, "num_tokens": 37305955.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 99 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 97.10546875, "completions/mean_terminated_length": 621.4750366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 2.7777777777777783e-06, "loss": 0.0, "num_tokens": 37700222.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 100 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.875, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 74.58984375, "completions/mean_terminated_length": 596.71875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.1616, "grad_norm": 0.0, "learning_rate": 2.7500000000000004e-06, "loss": 0.0, "num_tokens": 38080061.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 101 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83984375, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 82.86328125, "completions/mean_terminated_length": 517.3901977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.1632, "grad_norm": 0.0, "learning_rate": 2.7222222222222224e-06, "loss": 0.0, "num_tokens": 38450642.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 102 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 86.62109375, "completions/mean_terminated_length": 652.2058715820312, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.1648, "grad_norm": 0.0, "learning_rate": 2.6944444444444444e-06, "loss": 0.0, "num_tokens": 38838233.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 103 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1517.0, "completions/max_terminated_length": 1517.0, "completions/mean_length": 101.23046875, "completions/mean_terminated_length": 617.0238037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.1664, "grad_norm": 0.0, "learning_rate": 2.666666666666667e-06, "loss": 0.0, "num_tokens": 39211388.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 104 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87890625, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 73.390625, "completions/mean_terminated_length": 606.0645141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.168, "grad_norm": 0.0, "learning_rate": 2.6388888888888893e-06, "loss": 0.0, "num_tokens": 39591112.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 105 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.90625, "completions/max_length": 1518.0, "completions/max_terminated_length": 1518.0, "completions/mean_length": 62.890625, "completions/mean_terminated_length": 670.8333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 30.0, "epoch": 0.1696, "grad_norm": 0.0, "learning_rate": 2.6111111111111113e-06, "loss": 0.0, "num_tokens": 39973988.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 106 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.86328125, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 77.8203125, "completions/mean_terminated_length": 569.2000122070312, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.1712, "grad_norm": 0.0, "learning_rate": 2.5833333333333337e-06, "loss": 0.0, "num_tokens": 40355750.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 107 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8125, "completions/max_length": 1531.0, "completions/max_terminated_length": 1531.0, "completions/mean_length": 108.99609375, "completions/mean_terminated_length": 581.3125, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.1728, "grad_norm": 0.0, "learning_rate": 2.5555555555555557e-06, "loss": 0.0, "num_tokens": 40736717.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 108 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8828125, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 64.22265625, "completions/mean_terminated_length": 548.0333862304688, "completions/min_length": 0.0, "completions/min_terminated_length": 28.0, "epoch": 0.1744, "grad_norm": 0.0, "learning_rate": 2.5277777777777778e-06, "loss": 0.0, "num_tokens": 41096742.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 109 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 71.94921875, "completions/mean_terminated_length": 558.1515502929688, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.176, "grad_norm": 0.0, "learning_rate": 2.5e-06, "loss": 0.0, "num_tokens": 41469713.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 110 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 82.984375, "completions/mean_terminated_length": 643.757568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.1776, "grad_norm": 0.0, "learning_rate": 2.4722222222222226e-06, "loss": 0.0, "num_tokens": 41842861.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 111 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1526.0, "completions/max_terminated_length": 1526.0, "completions/mean_length": 84.08203125, "completions/mean_terminated_length": 597.9166870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.1792, "grad_norm": 0.0, "learning_rate": 2.4444444444444447e-06, "loss": 0.0, "num_tokens": 42223010.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 112 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 78.984375, "completions/mean_terminated_length": 612.727294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.1808, "grad_norm": 0.0, "learning_rate": 2.4166666666666667e-06, "loss": 0.0, "num_tokens": 42576774.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 113 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.90625, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 61.953125, "completions/mean_terminated_length": 660.8333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.1824, "grad_norm": 0.0, "learning_rate": 2.388888888888889e-06, "loss": 0.0, "num_tokens": 42936170.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 114 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 89.3828125, "completions/mean_terminated_length": 602.1578979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.184, "grad_norm": 0.0, "learning_rate": 2.361111111111111e-06, "loss": 0.0, "num_tokens": 43311860.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 115 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 105.31640625, "completions/mean_terminated_length": 641.9285888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.1856, "grad_norm": 0.0, "learning_rate": 2.3333333333333336e-06, "loss": 0.0, "num_tokens": 43691325.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 116 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 92.14453125, "completions/mean_terminated_length": 561.6428833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.1872, "grad_norm": 0.0, "learning_rate": 2.305555555555556e-06, "loss": 0.0, "num_tokens": 44055330.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 117 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84765625, "completions/max_length": 1526.0, "completions/max_terminated_length": 1526.0, "completions/mean_length": 103.859375, "completions/mean_terminated_length": 681.7435913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.1888, "grad_norm": 0.0, "learning_rate": 2.277777777777778e-06, "loss": 0.0, "num_tokens": 44424398.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 118 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 92.75, "completions/mean_terminated_length": 624.8421020507812, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.1904, "grad_norm": 0.0, "learning_rate": 2.25e-06, "loss": 0.0, "num_tokens": 44795822.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 119 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 83.48046875, "completions/mean_terminated_length": 577.5946044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.192, "grad_norm": 0.0, "learning_rate": 2.222222222222222e-06, "loss": 0.0, "num_tokens": 45151641.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 120 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.82421875, "completions/max_length": 1519.0, "completions/max_terminated_length": 1519.0, "completions/mean_length": 111.44140625, "completions/mean_terminated_length": 633.977783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.1936, "grad_norm": 0.0, "learning_rate": 2.1944444444444445e-06, "loss": 0.0, "num_tokens": 45509442.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 121 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.86328125, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 84.8125, "completions/mean_terminated_length": 620.3428344726562, "completions/min_length": 0.0, "completions/min_terminated_length": 28.0, "epoch": 0.1952, "grad_norm": 0.0, "learning_rate": 2.166666666666667e-06, "loss": 0.0, "num_tokens": 45884586.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 122 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.875, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 74.0703125, "completions/mean_terminated_length": 592.5625, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.1968, "grad_norm": 0.0, "learning_rate": 2.138888888888889e-06, "loss": 0.0, "num_tokens": 46257996.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 123 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 73.91015625, "completions/mean_terminated_length": 556.5, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.1984, "grad_norm": 0.0, "learning_rate": 2.1111111111111114e-06, "loss": 0.0, "num_tokens": 46626581.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 124 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84765625, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 85.8671875, "completions/mean_terminated_length": 563.6410522460938, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 2.0833333333333334e-06, "loss": 0.0, "num_tokens": 47005251.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 125 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83984375, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 99.109375, "completions/mean_terminated_length": 618.8292236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.2016, "grad_norm": 0.0, "learning_rate": 2.0555555555555555e-06, "loss": 0.0, "num_tokens": 47389567.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 126 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8828125, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 80.375, "completions/mean_terminated_length": 685.86669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.2032, "grad_norm": 0.0, "learning_rate": 2.027777777777778e-06, "loss": 0.0, "num_tokens": 47769455.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 127 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87890625, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 65.86328125, "completions/mean_terminated_length": 543.9031982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.2048, "grad_norm": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.0, "num_tokens": 48144572.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 128 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.921875, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 35.14453125, "completions/mean_terminated_length": 449.8500061035156, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.2064, "grad_norm": 0.0, "learning_rate": 1.9722222222222224e-06, "loss": 0.0, "num_tokens": 48511137.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 129 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 82.51171875, "completions/mean_terminated_length": 621.2647094726562, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.208, "grad_norm": 0.0, "learning_rate": 1.944444444444445e-06, "loss": 0.0, "num_tokens": 48866972.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 130 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.88671875, "completions/max_length": 1513.0, "completions/max_terminated_length": 1513.0, "completions/mean_length": 78.640625, "completions/mean_terminated_length": 694.2069091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.2096, "grad_norm": 0.0, "learning_rate": 1.916666666666667e-06, "loss": 0.0, "num_tokens": 49244216.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 131 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.890625, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 64.32421875, "completions/mean_terminated_length": 588.107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 19.0, "epoch": 0.2112, "grad_norm": 0.0, "learning_rate": 1.888888888888889e-06, "loss": 0.0, "num_tokens": 49615747.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 132 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 80.04296875, "completions/mean_terminated_length": 512.2750244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.2128, "grad_norm": 0.0, "learning_rate": 1.8611111111111113e-06, "loss": 0.0, "num_tokens": 49983462.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 133 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84765625, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 105.18359375, "completions/mean_terminated_length": 690.4359130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.2144, "grad_norm": 0.0, "learning_rate": 1.8333333333333333e-06, "loss": 0.0, "num_tokens": 50355429.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 134 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.875, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 69.14453125, "completions/mean_terminated_length": 553.15625, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.216, "grad_norm": 0.0, "learning_rate": 1.8055555555555557e-06, "loss": 0.0, "num_tokens": 50716714.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 135 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84765625, "completions/max_length": 1521.0, "completions/max_terminated_length": 1521.0, "completions/mean_length": 102.4140625, "completions/mean_terminated_length": 672.2564086914062, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.2176, "grad_norm": 0.0, "learning_rate": 1.777777777777778e-06, "loss": 0.0, "num_tokens": 51111204.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 136 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 88.69921875, "completions/mean_terminated_length": 688.0909423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.2192, "grad_norm": 0.0, "learning_rate": 1.75e-06, "loss": 0.0, "num_tokens": 51490127.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 137 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1534.0, "completions/max_terminated_length": 1534.0, "completions/mean_length": 97.671875, "completions/mean_terminated_length": 625.1000366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2208, "grad_norm": 0.0, "learning_rate": 1.7222222222222224e-06, "loss": 0.0, "num_tokens": 51874467.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 138 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9140625, "completions/max_length": 1528.0, "completions/max_terminated_length": 1528.0, "completions/mean_length": 50.91796875, "completions/mean_terminated_length": 592.5, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.2224, "grad_norm": 0.0, "learning_rate": 1.6944444444444446e-06, "loss": 0.0, "num_tokens": 52239766.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 139 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87890625, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 57.9453125, "completions/mean_terminated_length": 478.51611328125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.224, "grad_norm": 0.0, "learning_rate": 1.6666666666666667e-06, "loss": 0.0, "num_tokens": 52613024.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 140 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.88671875, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 84.6171875, "completions/mean_terminated_length": 746.9655151367188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.2256, "grad_norm": 0.0, "learning_rate": 1.638888888888889e-06, "loss": 0.0, "num_tokens": 52993446.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 141 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.875, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 92.37109375, "completions/mean_terminated_length": 738.96875, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.2272, "grad_norm": 0.0, "learning_rate": 1.6111111111111113e-06, "loss": 0.0, "num_tokens": 53385989.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 142 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.890625, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 59.78515625, "completions/mean_terminated_length": 546.607177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.2288, "grad_norm": 0.0, "learning_rate": 1.5833333333333333e-06, "loss": 0.0, "num_tokens": 53769790.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 143 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87890625, "completions/max_length": 1534.0, "completions/max_terminated_length": 1534.0, "completions/mean_length": 77.9921875, "completions/mean_terminated_length": 644.0645141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 21.0, "epoch": 0.2304, "grad_norm": 0.0, "learning_rate": 1.5555555555555558e-06, "loss": 0.0, "num_tokens": 54138484.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 144 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 94.0390625, "completions/mean_terminated_length": 650.648681640625, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.232, "grad_norm": 0.0, "learning_rate": 1.527777777777778e-06, "loss": 0.0, "num_tokens": 54530294.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 145 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 95.390625, "completions/mean_terminated_length": 660.0, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.2336, "grad_norm": 0.0, "learning_rate": 1.5e-06, "loss": 0.0, "num_tokens": 54914330.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 146 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1507.0, "completions/max_terminated_length": 1507.0, "completions/mean_length": 94.09375, "completions/mean_terminated_length": 708.4705810546875, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2352, "grad_norm": 0.0, "learning_rate": 1.4722222222222225e-06, "loss": 0.0, "num_tokens": 55283858.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 147 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83203125, "completions/max_length": 1458.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 100.16015625, "completions/mean_terminated_length": 596.3023071289062, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.2368, "grad_norm": 0.0, "learning_rate": 1.4444444444444445e-06, "loss": 0.0, "num_tokens": 55663283.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 148 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 95.8671875, "completions/mean_terminated_length": 663.2973022460938, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.2384, "grad_norm": 0.0, "learning_rate": 1.4166666666666667e-06, "loss": 0.0, "num_tokens": 56047041.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 149 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.86328125, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 66.51953125, "completions/mean_terminated_length": 486.5428466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.3888888888888892e-06, "loss": 0.0, "num_tokens": 56400558.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 150 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.82421875, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 105.6171875, "completions/mean_terminated_length": 600.844482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.2416, "grad_norm": 0.0, "learning_rate": 1.3611111111111112e-06, "loss": 0.0, "num_tokens": 56778492.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 151 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 94.42578125, "completions/mean_terminated_length": 575.547607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.2432, "grad_norm": 0.0, "learning_rate": 1.3333333333333334e-06, "loss": 0.0, "num_tokens": 57140545.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 152 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 90.97265625, "completions/mean_terminated_length": 684.9705810546875, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.2448, "grad_norm": 0.0, "learning_rate": 1.3055555555555556e-06, "loss": 0.0, "num_tokens": 57516770.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 153 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1535.0, "completions/max_terminated_length": 1535.0, "completions/mean_length": 89.671875, "completions/mean_terminated_length": 604.1052856445312, "completions/min_length": 0.0, "completions/min_terminated_length": 16.0, "epoch": 0.2464, "grad_norm": 0.0, "learning_rate": 1.2777777777777779e-06, "loss": 0.0, "num_tokens": 57892726.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 154 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 88.7421875, "completions/mean_terminated_length": 631.0555419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.248, "grad_norm": 0.0, "learning_rate": 1.25e-06, "loss": 0.0, "num_tokens": 58273708.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 155 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 86.375, "completions/mean_terminated_length": 650.3529663085938, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.2496, "grad_norm": 0.0, "learning_rate": 1.2222222222222223e-06, "loss": 0.0, "num_tokens": 58647788.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 156 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 83.45703125, "completions/mean_terminated_length": 593.4722290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.2512, "grad_norm": 0.0, "learning_rate": 1.1944444444444446e-06, "loss": 0.0, "num_tokens": 59015529.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 157 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8125, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 108.015625, "completions/mean_terminated_length": 576.0833740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.2528, "grad_norm": 0.0, "learning_rate": 1.1666666666666668e-06, "loss": 0.0, "num_tokens": 59412013.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 158 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8828125, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 65.76171875, "completions/mean_terminated_length": 561.1666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.2544, "grad_norm": 0.0, "learning_rate": 1.138888888888889e-06, "loss": 0.0, "num_tokens": 59782528.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 159 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.78515625, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 120.625, "completions/mean_terminated_length": 561.4545288085938, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.256, "grad_norm": 0.0, "learning_rate": 1.111111111111111e-06, "loss": 0.0, "num_tokens": 60157520.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 160 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.86328125, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 100.26953125, "completions/mean_terminated_length": 733.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.2576, "grad_norm": 0.0, "learning_rate": 1.0833333333333335e-06, "loss": 0.0, "num_tokens": 60532317.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 161 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.828125, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 98.61328125, "completions/mean_terminated_length": 573.75, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.2592, "grad_norm": 0.0, "learning_rate": 1.0555555555555557e-06, "loss": 0.0, "num_tokens": 60898842.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 162 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 96.0625, "completions/mean_terminated_length": 683.1111450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.2608, "grad_norm": 0.0, "learning_rate": 1.0277777777777777e-06, "loss": 0.0, "num_tokens": 61272290.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 163 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 116.3671875, "completions/mean_terminated_length": 744.75, "completions/min_length": 0.0, "completions/min_terminated_length": 40.0, "epoch": 0.2624, "grad_norm": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.0, "num_tokens": 61660200.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 164 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8203125, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 129.41015625, "completions/mean_terminated_length": 720.1956787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.264, "grad_norm": 0.0, "learning_rate": 9.722222222222224e-07, "loss": 0.0, "num_tokens": 62033737.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 165 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 88.7734375, "completions/mean_terminated_length": 668.4117431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.2656, "grad_norm": 0.0, "learning_rate": 9.444444444444445e-07, "loss": 0.0, "num_tokens": 62408999.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 166 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1532.0, "completions/max_terminated_length": 1532.0, "completions/mean_length": 100.5234375, "completions/mean_terminated_length": 612.7142944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.2672, "grad_norm": 0.0, "learning_rate": 9.166666666666666e-07, "loss": 0.0, "num_tokens": 62801453.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 167 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 94.0234375, "completions/mean_terminated_length": 650.54052734375, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.2688, "grad_norm": 0.0, "learning_rate": 8.88888888888889e-07, "loss": 0.0, "num_tokens": 63175363.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 168 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 91.1796875, "completions/mean_terminated_length": 614.26318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.2704, "grad_norm": 0.0, "learning_rate": 8.611111111111112e-07, "loss": 0.0, "num_tokens": 63556105.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 169 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87109375, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 77.1328125, "completions/mean_terminated_length": 598.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.272, "grad_norm": 0.0, "learning_rate": 8.333333333333333e-07, "loss": 0.0, "num_tokens": 63907227.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 170 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.88671875, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 60.9375, "completions/mean_terminated_length": 537.9310302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.2736, "grad_norm": 0.0, "learning_rate": 8.055555555555557e-07, "loss": 0.0, "num_tokens": 64275131.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 171 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 79.08203125, "completions/mean_terminated_length": 595.441162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.2752, "grad_norm": 0.0, "learning_rate": 7.777777777777779e-07, "loss": 0.0, "num_tokens": 64657800.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 172 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 100.125, "completions/mean_terminated_length": 712.0, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.2768, "grad_norm": 0.0, "learning_rate": 7.5e-07, "loss": 0.0, "num_tokens": 65036304.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 173 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 88.95703125, "completions/mean_terminated_length": 569.3250122070312, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.2784, "grad_norm": 0.0, "learning_rate": 7.222222222222222e-07, "loss": 0.0, "num_tokens": 65418037.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 174 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84765625, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 94.8671875, "completions/mean_terminated_length": 622.7179565429688, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 6.944444444444446e-07, "loss": 0.0, "num_tokens": 65783035.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 175 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.91796875, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 38.36328125, "completions/mean_terminated_length": 467.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.2816, "grad_norm": 0.0, "learning_rate": 6.666666666666667e-07, "loss": 0.0, "num_tokens": 66132056.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 176 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8359375, "completions/max_length": 1534.0, "completions/max_terminated_length": 1534.0, "completions/mean_length": 107.73046875, "completions/mean_terminated_length": 656.6428833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.2832, "grad_norm": 0.0, "learning_rate": 6.388888888888889e-07, "loss": 0.0, "num_tokens": 66514435.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 177 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.85546875, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 84.62890625, "completions/mean_terminated_length": 585.54052734375, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.2848, "grad_norm": 0.0, "learning_rate": 6.111111111111112e-07, "loss": 0.0, "num_tokens": 66880156.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 178 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.828125, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 95.90234375, "completions/mean_terminated_length": 557.977294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 21.0, "epoch": 0.2864, "grad_norm": 0.0, "learning_rate": 5.833333333333334e-07, "loss": 0.0, "num_tokens": 67255579.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 179 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.87890625, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 64.99609375, "completions/mean_terminated_length": 536.741943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.288, "grad_norm": 0.0, "learning_rate": 5.555555555555555e-07, "loss": 0.0, "num_tokens": 67609802.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 180 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.875, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 82.04296875, "completions/mean_terminated_length": 656.34375, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.2896, "grad_norm": 0.0, "learning_rate": 5.277777777777779e-07, "loss": 0.0, "num_tokens": 67981549.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 181 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.82421875, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 122.66015625, "completions/mean_terminated_length": 697.7999877929688, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.2912, "grad_norm": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 0.0, "num_tokens": 68357534.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 182 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83984375, "completions/max_length": 1523.0, "completions/max_terminated_length": 1523.0, "completions/mean_length": 107.69140625, "completions/mean_terminated_length": 672.4146118164062, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.2928, "grad_norm": 0.0, "learning_rate": 4.7222222222222226e-07, "loss": 0.0, "num_tokens": 68725335.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 183 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.86328125, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 88.5, "completions/mean_terminated_length": 647.3142700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.2944, "grad_norm": 0.0, "learning_rate": 4.444444444444445e-07, "loss": 0.0, "num_tokens": 69098311.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 184 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84765625, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 96.26953125, "completions/mean_terminated_length": 631.923095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.296, "grad_norm": 0.0, "learning_rate": 4.1666666666666667e-07, "loss": 0.0, "num_tokens": 69474452.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 185 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 95.234375, "completions/mean_terminated_length": 609.5, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.2976, "grad_norm": 0.0, "learning_rate": 3.8888888888888895e-07, "loss": 0.0, "num_tokens": 69841976.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 186 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.890625, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 68.9921875, "completions/mean_terminated_length": 630.7857666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.2992, "grad_norm": 0.0, "learning_rate": 3.611111111111111e-07, "loss": 0.0, "num_tokens": 70207238.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 187 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8671875, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 72.6796875, "completions/mean_terminated_length": 547.2352905273438, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.3008, "grad_norm": 0.0, "learning_rate": 3.3333333333333335e-07, "loss": 0.0, "num_tokens": 70591916.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 188 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8203125, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 105.11328125, "completions/mean_terminated_length": 584.978271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.3024, "grad_norm": 0.0, "learning_rate": 3.055555555555556e-07, "loss": 0.0, "num_tokens": 70961801.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 189 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83984375, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 98.6875, "completions/mean_terminated_length": 616.1951293945312, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.304, "grad_norm": 0.0, "learning_rate": 2.7777777777777776e-07, "loss": 0.0, "num_tokens": 71336625.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 190 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.81640625, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 109.30078125, "completions/mean_terminated_length": 595.3403930664062, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.3056, "grad_norm": 0.0, "learning_rate": 2.5000000000000004e-07, "loss": 0.0, "num_tokens": 71719838.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 191 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 73.8125, "completions/mean_terminated_length": 497.2631530761719, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.3072, "grad_norm": 0.0, "learning_rate": 2.2222222222222224e-07, "loss": 0.0, "num_tokens": 72097886.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 192 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.8515625, "completions/max_length": 1516.0, "completions/max_terminated_length": 1516.0, "completions/mean_length": 109.515625, "completions/mean_terminated_length": 737.7894897460938, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.3088, "grad_norm": 0.0, "learning_rate": 1.9444444444444447e-07, "loss": 0.0, "num_tokens": 72485010.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 193 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 95.6328125, "completions/mean_terminated_length": 680.0555419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 21.0, "epoch": 0.3104, "grad_norm": 0.0, "learning_rate": 1.6666666666666668e-07, "loss": 0.0, "num_tokens": 72866236.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 194 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.828125, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 98.37890625, "completions/mean_terminated_length": 572.3863525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.312, "grad_norm": 0.0, "learning_rate": 1.3888888888888888e-07, "loss": 0.0, "num_tokens": 73242717.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 195 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84765625, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 90.1328125, "completions/mean_terminated_length": 591.6410522460938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.3136, "grad_norm": 0.0, "learning_rate": 1.1111111111111112e-07, "loss": 0.0, "num_tokens": 73620487.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 196 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.84375, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 100.1953125, "completions/mean_terminated_length": 641.25, "completions/min_length": 0.0, "completions/min_terminated_length": 38.0, "epoch": 0.3152, "grad_norm": 0.0, "learning_rate": 8.333333333333334e-08, "loss": 0.0, "num_tokens": 74000257.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 197 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.83203125, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 108.84375, "completions/mean_terminated_length": 648.0, "completions/min_length": 0.0, "completions/min_terminated_length": 16.0, "epoch": 0.3168, "grad_norm": 0.0, "learning_rate": 5.555555555555556e-08, "loss": 0.0, "num_tokens": 74382729.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 198 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.859375, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 85.1484375, "completions/mean_terminated_length": 605.5, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.3184, "grad_norm": 0.0, "learning_rate": 2.777777777777778e-08, "loss": 0.0, "num_tokens": 74760231.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 199 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.89453125, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 72.171875, "completions/mean_terminated_length": 684.2963256835938, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.0, "num_tokens": 75104483.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 200 }, { "epoch": 0.32, "step": 200, "total_flos": 0.0, "train_loss": -0.004646302500041202, "train_runtime": 12222.6581, "train_samples_per_second": 4.189, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 75104483, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }