{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.32, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.390625, "calib/avg_num_step_conf": 2.19921875, "calib/ece": 0.6720437956204381, "calib/final_conf_rate": 0.53515625, "calib/format_rate": 0.30859375, "calib/frac_conf_gt_0.9": 0.4744525547445255, "calib/gap": 0.09598290598290582, "calib/mean_conf": 0.8180291970802919, "calib/mu_c": 0.9, "calib/mu_w": 0.8040170940170942, "calib/nonempty_final_conf_rate": 0.53515625, "calib/nonempty_reasoning_rate": 0.51171875, "calib/nonempty_step_conf_rate": 0.4375, "calib/pce": 0.6720437956204381, "calib/std_conf": 0.25329605140720096, "calib/step_conf_rate": 0.4375, "calib/step_q_c": 0.7754022988505747, "calib/step_q_c_n": 87.0, "calib/step_q_gap": -0.031173331401526228, "calib/step_q_w": 0.8065756302521009, "calib/step_q_w_n": 476.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 332.60546875, "completions/mean_terminated_length": 333.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0016, "grad_norm": 47.9720344543457, "learning_rate": 0.0, "loss": -0.133, "num_tokens": 338475.0, "reward": 0.232421875, "reward_std": 0.3509902358055115, "rewards/accuracy_reward_step": 0.078125, "rewards/format_reward_step": 0.30859375, "step": 1 }, { "calib/answer_extract_rate": 0.40625, "calib/avg_num_step_conf": 1.8828125, "calib/ece": 0.657346153846154, "calib/final_conf_rate": 0.5078125, "calib/format_rate": 0.328125, "calib/frac_conf_gt_0.9": 0.5538461538461539, "calib/gap": 0.09180690399136981, "calib/mean_conf": 0.8650384615384616, "calib/mu_c": 0.9377777777777778, "calib/mu_w": 0.845970873786408, "calib/nonempty_final_conf_rate": 0.5078125, "calib/nonempty_reasoning_rate": 0.4921875, "calib/nonempty_step_conf_rate": 0.421875, "calib/pce": 0.657346153846154, "calib/std_conf": 0.1902169774775902, "calib/step_conf_rate": 0.421875, "calib/step_q_c": 0.8529457364341085, "calib/step_q_c_n": 129.0, "calib/step_q_gap": 0.04926301688736623, "calib/step_q_w": 0.8036827195467423, "calib/step_q_w_n": 353.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2868.0, "completions/max_terminated_length": 2868.0, "completions/mean_length": 369.18359375, "completions/mean_terminated_length": 370.63140869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 17.44351577758789, "learning_rate": 5e-08, "loss": -0.1458, "num_tokens": 693922.0, "reward": 0.26953125, "reward_std": 0.4115494191646576, "rewards/accuracy_reward_step": 0.10546875, "rewards/format_reward_step": 0.328125, "step": 2 }, { "calib/answer_extract_rate": 0.4453125, "calib/avg_num_step_conf": 2.08984375, "calib/ece": 0.6778014184397165, "calib/final_conf_rate": 0.55078125, "calib/format_rate": 0.32421875, "calib/frac_conf_gt_0.9": 0.45390070921985815, "calib/gap": 0.043058216654384696, "calib/mean_conf": 0.8409219858156027, "calib/mu_c": 0.8769565217391305, "calib/mu_w": 0.8338983050847458, "calib/nonempty_final_conf_rate": 0.55078125, "calib/nonempty_reasoning_rate": 0.52734375, "calib/nonempty_step_conf_rate": 0.4140625, "calib/pce": 0.6778014184397165, "calib/std_conf": 0.1980575914137701, "calib/step_conf_rate": 0.4140625, "calib/step_q_c": 0.7861038961038961, "calib/step_q_c_n": 77.0, "calib/step_q_gap": 0.025776385186865425, "calib/step_q_w": 0.7603275109170307, "calib/step_q_w_n": 458.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 308.23046875, "completions/mean_terminated_length": 310.657470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0048, "grad_norm": 39.398468017578125, "learning_rate": 1e-07, "loss": -0.0842, "num_tokens": 1029013.0, "reward": 0.251953125, "reward_std": 0.37410488724708557, "rewards/accuracy_reward_step": 0.08984375, "rewards/format_reward_step": 0.32421875, "step": 3 }, { "calib/answer_extract_rate": 0.4453125, "calib/avg_num_step_conf": 2.72265625, "calib/ece": 0.6189041095890411, "calib/final_conf_rate": 0.5703125, "calib/format_rate": 0.33984375, "calib/frac_conf_gt_0.9": 0.4178082191780822, "calib/gap": 0.11410256410256403, "calib/mean_conf": 0.7969863013698631, "calib/mu_c": 0.8907692307692306, "calib/mu_w": 0.7766666666666666, "calib/nonempty_final_conf_rate": 0.5703125, "calib/nonempty_reasoning_rate": 0.58984375, "calib/nonempty_step_conf_rate": 0.5, "calib/pce": 0.6189041095890411, "calib/std_conf": 0.2410262586246447, "calib/step_conf_rate": 0.5, "calib/step_q_c": 0.8159722222222222, "calib/step_q_c_n": 72.0, "calib/step_q_gap": 0.04115622222222226, "calib/step_q_w": 0.774816, "calib/step_q_w_n": 625.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 356.44140625, "completions/mean_terminated_length": 357.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 15.860779762268066, "learning_rate": 1.5e-07, "loss": -0.0715, "num_tokens": 1378878.0, "reward": 0.271484375, "reward_std": 0.3853558599948883, "rewards/accuracy_reward_step": 0.1015625, "rewards/format_reward_step": 0.33984375, "step": 4 }, { "calib/answer_extract_rate": 0.4140625, "calib/avg_num_step_conf": 2.06640625, "calib/ece": 0.6523021582733812, "calib/final_conf_rate": 0.54296875, "calib/format_rate": 0.3203125, "calib/frac_conf_gt_0.9": 0.5107913669064749, "calib/gap": 0.024090909090909052, "calib/mean_conf": 0.8609352517985612, "calib/mu_c": 0.88, "calib/mu_w": 0.855909090909091, "calib/nonempty_final_conf_rate": 0.54296875, "calib/nonempty_reasoning_rate": 0.51953125, "calib/nonempty_step_conf_rate": 0.4453125, "calib/pce": 0.6523021582733812, "calib/std_conf": 0.17768654028664443, "calib/step_conf_rate": 0.4453125, "calib/step_q_c": 0.8280733944954128, "calib/step_q_c_n": 109.0, "calib/step_q_gap": 0.05551969521140332, "calib/step_q_w": 0.7725536992840095, "calib/step_q_w_n": 419.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2552.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 311.7421875, "completions/mean_terminated_length": 314.19683837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008, "grad_norm": 21.699703216552734, "learning_rate": 2e-07, "loss": -0.0928, "num_tokens": 1720028.0, "reward": 0.27734375, "reward_std": 0.39154917001724243, "rewards/accuracy_reward_step": 0.1171875, "rewards/format_reward_step": 0.3203125, "step": 5 }, { "calib/answer_extract_rate": 0.42578125, "calib/avg_num_step_conf": 2.34375, "calib/ece": 0.689857142857143, "calib/final_conf_rate": 0.546875, "calib/format_rate": 0.33203125, "calib/frac_conf_gt_0.9": 0.5857142857142857, "calib/gap": 0.06312499999999999, "calib/mean_conf": 0.8898571428571428, "calib/mu_c": 0.9403571428571429, "calib/mu_w": 0.8772321428571429, "calib/nonempty_final_conf_rate": 0.546875, "calib/nonempty_reasoning_rate": 0.53515625, "calib/nonempty_step_conf_rate": 0.45703125, "calib/pce": 0.689857142857143, "calib/std_conf": 0.15006658386142047, "calib/step_conf_rate": 0.45703125, "calib/step_q_c": 0.7554385964912281, "calib/step_q_c_n": 114.0, "calib/step_q_gap": -0.06482889322070595, "calib/step_q_w": 0.8202674897119341, "calib/step_q_w_n": 486.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 334.62890625, "completions/mean_terminated_length": 337.2637634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 60.161705017089844, "learning_rate": 2.5e-07, "loss": -0.1094, "num_tokens": 2067637.0, "reward": 0.279296875, "reward_std": 0.4157405495643616, "rewards/accuracy_reward_step": 0.11328125, "rewards/format_reward_step": 0.33203125, "step": 6 }, { "calib/answer_extract_rate": 0.4609375, "calib/avg_num_step_conf": 2.84375, "calib/ece": 0.6351578947368423, "calib/final_conf_rate": 0.59375, "calib/format_rate": 0.33984375, "calib/frac_conf_gt_0.9": 0.46710526315789475, "calib/gap": 0.07446974140229257, "calib/mean_conf": 0.8391052631578948, "calib/mu_c": 0.8983870967741934, "calib/mu_w": 0.8239173553719008, "calib/nonempty_final_conf_rate": 0.59375, "calib/nonempty_reasoning_rate": 0.546875, "calib/nonempty_step_conf_rate": 0.43359375, "calib/pce": 0.6351578947368423, "calib/std_conf": 0.19963740677244204, "calib/step_conf_rate": 0.43359375, "calib/step_q_c": 0.835945945945946, "calib/step_q_c_n": 111.0, "calib/step_q_gap": 0.06098646458451962, "calib/step_q_w": 0.7749594813614263, "calib/step_q_w_n": 617.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2738.0, "completions/max_terminated_length": 2738.0, "completions/mean_length": 373.12109375, "completions/mean_terminated_length": 377.54547119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0112, "grad_norm": 17.492589950561523, "learning_rate": 3e-07, "loss": -0.0997, "num_tokens": 2424132.0, "reward": 0.294921875, "reward_std": 0.3952005207538605, "rewards/accuracy_reward_step": 0.125, "rewards/format_reward_step": 0.33984375, "step": 7 }, { "calib/answer_extract_rate": 0.47265625, "calib/avg_num_step_conf": 2.17578125, "calib/ece": 0.6398734177215191, "calib/final_conf_rate": 0.6171875, "calib/format_rate": 0.34375, "calib/frac_conf_gt_0.9": 0.5063291139240507, "calib/gap": 0.09216724738675963, "calib/mean_conf": 0.8613924050632913, "calib/mu_c": 0.9331428571428573, "calib/mu_w": 0.8409756097560976, "calib/nonempty_final_conf_rate": 0.6171875, "calib/nonempty_reasoning_rate": 0.58203125, "calib/nonempty_step_conf_rate": 0.46875, "calib/pce": 0.6398734177215191, "calib/std_conf": 0.16952080024135338, "calib/step_conf_rate": 0.46875, "calib/step_q_c": 0.8074380165289258, "calib/step_q_c_n": 121.0, "calib/step_q_gap": 0.02860774129956789, "calib/step_q_w": 0.7788302752293579, "calib/step_q_w_n": 436.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2909.0, "completions/max_terminated_length": 2909.0, "completions/mean_length": 349.1953125, "completions/mean_terminated_length": 351.94488525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 11.481454849243164, "learning_rate": 3.5e-07, "loss": 0.008, "num_tokens": 2775366.0, "reward": 0.3125, "reward_std": 0.4172360301017761, "rewards/accuracy_reward_step": 0.140625, "rewards/format_reward_step": 0.34375, "step": 8 }, { "calib/answer_extract_rate": 0.4765625, "calib/avg_num_step_conf": 2.3203125, "calib/ece": 0.7459019108280255, "calib/final_conf_rate": 0.61328125, "calib/format_rate": 0.375, "calib/frac_conf_gt_0.9": 0.39490445859872614, "calib/gap": 0.018681028368794372, "calib/mean_conf": 0.8363477707006369, "calib/mu_c": 0.853125, "calib/mu_w": 0.8344439716312056, "calib/nonempty_final_conf_rate": 0.61328125, "calib/nonempty_reasoning_rate": 0.59765625, "calib/nonempty_step_conf_rate": 0.51171875, "calib/pce": 0.7401694267515924, "calib/std_conf": 0.19627053621659557, "calib/step_conf_rate": 0.51171875, "calib/step_q_c": 0.8187037037037037, "calib/step_q_c_n": 54.0, "calib/step_q_gap": 0.026277777777777844, "calib/step_q_w": 0.7924259259259259, "calib/step_q_w_n": 540.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2760.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 354.1796875, "completions/mean_terminated_length": 355.5686340332031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0144, "grad_norm": 17.99668312072754, "learning_rate": 4e-07, "loss": -0.0382, "num_tokens": 3127356.0, "reward": 0.25390625, "reward_std": 0.328166127204895, "rewards/accuracy_reward_step": 0.06640625, "rewards/format_reward_step": 0.375, "step": 9 }, { "calib/answer_extract_rate": 0.56640625, "calib/avg_num_step_conf": 2.5234375, "calib/ece": 0.7055191256830601, "calib/final_conf_rate": 0.71484375, "calib/format_rate": 0.47265625, "calib/frac_conf_gt_0.9": 0.5027322404371585, "calib/gap": 0.05723646723646714, "calib/mean_conf": 0.8530601092896175, "calib/mu_c": 0.9018518518518519, "calib/mu_w": 0.8446153846153848, "calib/nonempty_final_conf_rate": 0.71484375, "calib/nonempty_reasoning_rate": 0.62109375, "calib/nonempty_step_conf_rate": 0.55078125, "calib/pce": 0.7055191256830601, "calib/std_conf": 0.188480932618246, "calib/step_conf_rate": 0.55078125, "calib/step_q_c": 0.8683146067415732, "calib/step_q_c_n": 89.0, "calib/step_q_gap": 0.07136667137353003, "calib/step_q_w": 0.7969479353680432, "calib/step_q_w_n": 557.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 322.42578125, "completions/mean_terminated_length": 323.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 10.932899475097656, "learning_rate": 4.5e-07, "loss": -0.0028, "num_tokens": 3470017.0, "reward": 0.345703125, "reward_std": 0.3697780966758728, "rewards/accuracy_reward_step": 0.109375, "rewards/format_reward_step": 0.47265625, "step": 10 }, { "calib/answer_extract_rate": 0.515625, "calib/avg_num_step_conf": 2.28125, "calib/ece": 0.6898159509202453, "calib/final_conf_rate": 0.63671875, "calib/format_rate": 0.37890625, "calib/frac_conf_gt_0.9": 0.5276073619631901, "calib/gap": 0.06452937649880097, "calib/mean_conf": 0.8370552147239264, "calib/mu_c": 0.8920833333333333, "calib/mu_w": 0.8275539568345324, "calib/nonempty_final_conf_rate": 0.63671875, "calib/nonempty_reasoning_rate": 0.609375, "calib/nonempty_step_conf_rate": 0.48828125, "calib/pce": 0.6898159509202453, "calib/std_conf": 0.2278276512391015, "calib/step_conf_rate": 0.48828125, "calib/step_q_c": 0.833896103896104, "calib/step_q_c_n": 77.0, "calib/step_q_gap": 0.056894131509516166, "calib/step_q_w": 0.7770019723865879, "calib/step_q_w_n": 507.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 315.140625, "completions/mean_terminated_length": 317.6220397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0176, "grad_norm": 11.573603630065918, "learning_rate": 5e-07, "loss": -0.0437, "num_tokens": 3807693.0, "reward": 0.283203125, "reward_std": 0.37644994258880615, "rewards/accuracy_reward_step": 0.09375, "rewards/format_reward_step": 0.37890625, "step": 11 }, { "calib/answer_extract_rate": 0.50390625, "calib/avg_num_step_conf": 2.5234375, "calib/ece": 0.6290196078431373, "calib/final_conf_rate": 0.6640625, "calib/format_rate": 0.36328125, "calib/frac_conf_gt_0.9": 0.47058823529411764, "calib/gap": 0.05849759533970078, "calib/mean_conf": 0.8466666666666667, "calib/mu_c": 0.8924324324324325, "calib/mu_w": 0.8339348370927318, "calib/nonempty_final_conf_rate": 0.6640625, "calib/nonempty_reasoning_rate": 0.6484375, "calib/nonempty_step_conf_rate": 0.5234375, "calib/pce": 0.6290196078431373, "calib/std_conf": 0.18737121939137066, "calib/step_conf_rate": 0.5234375, "calib/step_q_c": 0.7443396226415094, "calib/step_q_c_n": 159.0, "calib/step_q_gap": -0.07715524388826467, "calib/step_q_w": 0.8214948665297741, "calib/step_q_w_n": 487.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 367.65234375, "completions/mean_terminated_length": 369.0941467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 13.031610488891602, "learning_rate": 5.5e-07, "loss": -0.0015, "num_tokens": 4162716.0, "reward": 0.337890625, "reward_std": 0.39855268597602844, "rewards/accuracy_reward_step": 0.15625, "rewards/format_reward_step": 0.36328125, "step": 12 }, { "calib/answer_extract_rate": 0.55078125, "calib/avg_num_step_conf": 2.5625, "calib/ece": 0.6973684210526316, "calib/final_conf_rate": 0.66796875, "calib/format_rate": 0.453125, "calib/frac_conf_gt_0.9": 0.5146198830409356, "calib/gap": 0.045510638297872275, "calib/mean_conf": 0.8728070175438597, "calib/mu_c": 0.9103333333333332, "calib/mu_w": 0.8648226950354609, "calib/nonempty_final_conf_rate": 0.66796875, "calib/nonempty_reasoning_rate": 0.66015625, "calib/nonempty_step_conf_rate": 0.57421875, "calib/pce": 0.6973684210526316, "calib/std_conf": 0.16450266411356892, "calib/step_conf_rate": 0.57421875, "calib/step_q_c": 0.8602912621359223, "calib/step_q_c_n": 103.0, "calib/step_q_gap": 0.08663845924261304, "calib/step_q_w": 0.7736528028933093, "calib/step_q_w_n": 553.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2592.0, "completions/max_terminated_length": 2592.0, "completions/mean_length": 302.15625, "completions/mean_terminated_length": 304.5354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0208, "grad_norm": 20.21171760559082, "learning_rate": 6e-07, "loss": -0.0033, "num_tokens": 4502212.0, "reward": 0.3515625, "reward_std": 0.4020528793334961, "rewards/accuracy_reward_step": 0.125, "rewards/format_reward_step": 0.453125, "step": 13 }, { "calib/answer_extract_rate": 0.55078125, "calib/avg_num_step_conf": 2.96875, "calib/ece": 0.5569461077844313, "calib/final_conf_rate": 0.65234375, "calib/format_rate": 0.4453125, "calib/frac_conf_gt_0.9": 0.48502994011976047, "calib/gap": 0.10652485994397765, "calib/mean_conf": 0.8326347305389222, "calib/mu_c": 0.9085416666666667, "calib/mu_w": 0.802016806722689, "calib/nonempty_final_conf_rate": 0.65234375, "calib/nonempty_reasoning_rate": 0.65234375, "calib/nonempty_step_conf_rate": 0.5625, "calib/pce": 0.5510778443113774, "calib/std_conf": 0.2198589738323819, "calib/step_conf_rate": 0.5625, "calib/step_q_c": 0.8128994082840236, "calib/step_q_c_n": 169.0, "calib/step_q_gap": 0.08107199711651092, "calib/step_q_w": 0.7318274111675127, "calib/step_q_w_n": 591.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2400.0, "completions/max_terminated_length": 2400.0, "completions/mean_length": 316.21875, "completions/mean_terminated_length": 318.7086486816406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0224, "grad_norm": 7.095739364624023, "learning_rate": 6.5e-07, "loss": -0.0308, "num_tokens": 4844676.0, "reward": 0.4140625, "reward_std": 0.40704041719436646, "rewards/accuracy_reward_step": 0.19140625, "rewards/format_reward_step": 0.4453125, "step": 14 }, { "calib/answer_extract_rate": 0.53515625, "calib/avg_num_step_conf": 2.64453125, "calib/ece": 0.7221173184357543, "calib/final_conf_rate": 0.69921875, "calib/format_rate": 0.4375, "calib/frac_conf_gt_0.9": 0.45251396648044695, "calib/gap": 0.047625942684766054, "calib/mean_conf": 0.867368715083799, "calib/mu_c": 0.9080769230769229, "calib/mu_w": 0.8604509803921568, "calib/nonempty_final_conf_rate": 0.69921875, "calib/nonempty_reasoning_rate": 0.6484375, "calib/nonempty_step_conf_rate": 0.55859375, "calib/pce": 0.7221173184357543, "calib/std_conf": 0.15471728884704963, "calib/step_conf_rate": 0.55859375, "calib/step_q_c": 0.8536585365853658, "calib/step_q_c_n": 82.0, "calib/step_q_gap": 0.033087108013937216, "calib/step_q_w": 0.8205714285714286, "calib/step_q_w_n": 595.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2624.0, "completions/max_terminated_length": 2624.0, "completions/mean_length": 330.5859375, "completions/mean_terminated_length": 333.18896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.024, "grad_norm": 15.420191764831543, "learning_rate": 7e-07, "loss": -0.0287, "num_tokens": 5190482.0, "reward": 0.32421875, "reward_std": 0.38361692428588867, "rewards/accuracy_reward_step": 0.10546875, "rewards/format_reward_step": 0.4375, "step": 15 }, { "calib/answer_extract_rate": 0.64453125, "calib/avg_num_step_conf": 3.0078125, "calib/ece": 0.6353367875647669, "calib/final_conf_rate": 0.75390625, "calib/format_rate": 0.5, "calib/frac_conf_gt_0.9": 0.5077720207253886, "calib/gap": 0.021999408459035807, "calib/mean_conf": 0.8736787564766839, "calib/mu_c": 0.8904347826086957, "calib/mu_w": 0.8684353741496599, "calib/nonempty_final_conf_rate": 0.75390625, "calib/nonempty_reasoning_rate": 0.75390625, "calib/nonempty_step_conf_rate": 0.63671875, "calib/pce": 0.6353367875647669, "calib/std_conf": 0.15623630440125438, "calib/step_conf_rate": 0.63671875, "calib/step_q_c": 0.8060789473684209, "calib/step_q_c_n": 190.0, "calib/step_q_gap": -0.03312794918330331, "calib/step_q_w": 0.8392068965517242, "calib/step_q_w_n": 580.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 299.28515625, "completions/mean_terminated_length": 300.4588317871094, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0256, "grad_norm": 30.280681610107422, "learning_rate": 7.5e-07, "loss": -0.049, "num_tokens": 5528243.0, "reward": 0.4296875, "reward_std": 0.421233594417572, "rewards/accuracy_reward_step": 0.1796875, "rewards/format_reward_step": 0.5, "step": 16 }, { "calib/answer_extract_rate": 0.5859375, "calib/avg_num_step_conf": 3.3828125, "calib/ece": 0.680291208791209, "calib/final_conf_rate": 0.7109375, "calib/format_rate": 0.48046875, "calib/frac_conf_gt_0.9": 0.4945054945054945, "calib/gap": 0.054295505389464926, "calib/mean_conf": 0.8616098901098903, "calib/mu_c": 0.906060606060606, "calib/mu_w": 0.8517651006711411, "calib/nonempty_final_conf_rate": 0.7109375, "calib/nonempty_reasoning_rate": 0.70703125, "calib/nonempty_step_conf_rate": 0.6171875, "calib/pce": 0.680291208791209, "calib/std_conf": 0.17178502573571683, "calib/step_conf_rate": 0.6171875, "calib/step_q_c": 0.8084076433121017, "calib/step_q_c_n": 157.0, "calib/step_q_gap": 0.051073369687277936, "calib/step_q_w": 0.7573342736248238, "calib/step_q_w_n": 709.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2685.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 325.03125, "completions/mean_terminated_length": 327.5905456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0272, "grad_norm": 35.424591064453125, "learning_rate": 8e-07, "loss": -0.0219, "num_tokens": 5862803.0, "reward": 0.369140625, "reward_std": 0.41405531764030457, "rewards/accuracy_reward_step": 0.12890625, "rewards/format_reward_step": 0.48046875, "step": 17 }, { "calib/answer_extract_rate": 0.59375, "calib/avg_num_step_conf": 3.09765625, "calib/ece": 0.7170000000000001, "calib/final_conf_rate": 0.703125, "calib/format_rate": 0.50390625, "calib/frac_conf_gt_0.9": 0.48333333333333334, "calib/gap": 0.039259259259259216, "calib/mean_conf": 0.8603333333333334, "calib/mu_c": 0.8937037037037038, "calib/mu_w": 0.8544444444444446, "calib/nonempty_final_conf_rate": 0.703125, "calib/nonempty_reasoning_rate": 0.6953125, "calib/nonempty_step_conf_rate": 0.6328125, "calib/pce": 0.7136666666666668, "calib/std_conf": 0.16531081835943667, "calib/step_conf_rate": 0.6328125, "calib/step_q_c": 0.8222962962962964, "calib/step_q_c_n": 135.0, "calib/step_q_gap": 0.037478667116965014, "calib/step_q_w": 0.7848176291793314, "calib/step_q_w_n": 658.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2649.0, "completions/max_terminated_length": 2649.0, "completions/mean_length": 348.58203125, "completions/mean_terminated_length": 351.3267822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0288, "grad_norm": 98.14373779296875, "learning_rate": 8.499999999999999e-07, "loss": -0.0782, "num_tokens": 6213768.0, "reward": 0.369140625, "reward_std": 0.40399375557899475, "rewards/accuracy_reward_step": 0.1171875, "rewards/format_reward_step": 0.50390625, "step": 18 }, { "calib/answer_extract_rate": 0.53515625, "calib/avg_num_step_conf": 2.47265625, "calib/ece": 0.6429113924050631, "calib/final_conf_rate": 0.6171875, "calib/format_rate": 0.41015625, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.08930424242424251, "calib/mean_conf": 0.8517721518987342, "calib/mu_c": 0.9224242424242424, "calib/mu_w": 0.8331199999999999, "calib/nonempty_final_conf_rate": 0.6171875, "calib/nonempty_reasoning_rate": 0.6328125, "calib/nonempty_step_conf_rate": 0.5234375, "calib/pce": 0.6429113924050631, "calib/std_conf": 0.18752727729803034, "calib/step_conf_rate": 0.5234375, "calib/step_q_c": 0.8583458646616541, "calib/step_q_c_n": 133.0, "calib/step_q_gap": 0.08488586466165404, "calib/step_q_w": 0.77346, "calib/step_q_w_n": 500.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2535.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 258.1328125, "completions/mean_terminated_length": 260.16534423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0304, "grad_norm": 32.44593811035156, "learning_rate": 9e-07, "loss": -0.1044, "num_tokens": 6540946.0, "reward": 0.333984375, "reward_std": 0.4141427278518677, "rewards/accuracy_reward_step": 0.12890625, "rewards/format_reward_step": 0.41015625, "step": 19 }, { "calib/answer_extract_rate": 0.57421875, "calib/avg_num_step_conf": 2.9765625, "calib/ece": 0.7031005917159765, "calib/final_conf_rate": 0.66015625, "calib/format_rate": 0.46875, "calib/frac_conf_gt_0.9": 0.5739644970414202, "calib/gap": -0.00785918003565067, "calib/mean_conf": 0.885112426035503, "calib/mu_c": 0.8787878787878787, "calib/mu_w": 0.8866470588235293, "calib/nonempty_final_conf_rate": 0.66015625, "calib/nonempty_reasoning_rate": 0.6953125, "calib/nonempty_step_conf_rate": 0.60546875, "calib/pce": 0.6964733727810652, "calib/std_conf": 0.15612767616829498, "calib/step_conf_rate": 0.60546875, "calib/step_q_c": 0.80135593220339, "calib/step_q_c_n": 118.0, "calib/step_q_gap": -0.021159595746920612, "calib/step_q_w": 0.8225155279503106, "calib/step_q_w_n": 644.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 297.640625, "completions/mean_terminated_length": 299.9842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 7.341494560241699, "learning_rate": 9.499999999999999e-07, "loss": -0.0809, "num_tokens": 6878446.0, "reward": 0.36328125, "reward_std": 0.38229066133499146, "rewards/accuracy_reward_step": 0.12890625, "rewards/format_reward_step": 0.46875, "step": 20 }, { "calib/answer_extract_rate": 0.6640625, "calib/avg_num_step_conf": 2.97265625, "calib/ece": 0.6631351351351351, "calib/final_conf_rate": 0.72265625, "calib/format_rate": 0.52734375, "calib/frac_conf_gt_0.9": 0.5297297297297298, "calib/gap": 0.06178123881131381, "calib/mean_conf": 0.8685405405405405, "calib/mu_c": 0.9176315789473684, "calib/mu_w": 0.8558503401360545, "calib/nonempty_final_conf_rate": 0.72265625, "calib/nonempty_reasoning_rate": 0.734375, "calib/nonempty_step_conf_rate": 0.609375, "calib/pce": 0.6631351351351351, "calib/std_conf": 0.1669844780931214, "calib/step_conf_rate": 0.609375, "calib/step_q_c": 0.8349693251533742, "calib/step_q_c_n": 163.0, "calib/step_q_gap": 0.027761967293842393, "calib/step_q_w": 0.8072073578595318, "calib/step_q_w_n": 598.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 319.4296875, "completions/mean_terminated_length": 320.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0336, "grad_norm": 12.134476661682129, "learning_rate": 1e-06, "loss": -0.1004, "num_tokens": 7218700.0, "reward": 0.419921875, "reward_std": 0.4154573082923889, "rewards/accuracy_reward_step": 0.15625, "rewards/format_reward_step": 0.52734375, "step": 21 }, { "calib/answer_extract_rate": 0.71484375, "calib/avg_num_step_conf": 3.5625, "calib/ece": 0.7179272727272729, "calib/final_conf_rate": 0.7734375, "calib/format_rate": 0.62109375, "calib/frac_conf_gt_0.9": 0.5909090909090909, "calib/gap": 0.031584218077475024, "calib/mean_conf": 0.8856040404040405, "calib/mu_c": 0.9117647058823529, "calib/mu_w": 0.8801804878048779, "calib/nonempty_final_conf_rate": 0.7734375, "calib/nonempty_reasoning_rate": 0.78125, "calib/nonempty_step_conf_rate": 0.703125, "calib/pce": 0.7159070707070709, "calib/std_conf": 0.15533752622718763, "calib/step_conf_rate": 0.703125, "calib/step_q_c": 0.8201428571428571, "calib/step_q_c_n": 140.0, "calib/step_q_gap": -0.0040799407846040525, "calib/step_q_w": 0.8242227979274611, "calib/step_q_w_n": 772.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2740.0, "completions/max_terminated_length": 2740.0, "completions/mean_length": 298.83203125, "completions/mean_terminated_length": 301.1850280761719, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0352, "grad_norm": 25.838075637817383, "learning_rate": 9.944444444444444e-07, "loss": -0.0722, "num_tokens": 7557345.0, "reward": 0.447265625, "reward_std": 0.4125175476074219, "rewards/accuracy_reward_step": 0.13671875, "rewards/format_reward_step": 0.62109375, "step": 22 }, { "calib/answer_extract_rate": 0.6953125, "calib/avg_num_step_conf": 3.41015625, "calib/ece": 0.6334895833333334, "calib/final_conf_rate": 0.75, "calib/format_rate": 0.58984375, "calib/frac_conf_gt_0.9": 0.640625, "calib/gap": 0.04990498167503732, "calib/mean_conf": 0.90953125, "calib/mu_c": 0.9456603773584905, "calib/mu_w": 0.8957553956834532, "calib/nonempty_final_conf_rate": 0.75, "calib/nonempty_reasoning_rate": 0.75390625, "calib/nonempty_step_conf_rate": 0.671875, "calib/pce": 0.6334895833333334, "calib/std_conf": 0.11774482058008964, "calib/step_conf_rate": 0.671875, "calib/step_q_c": 0.881795918367347, "calib/step_q_c_n": 245.0, "calib/step_q_gap": 0.036557064864162436, "calib/step_q_w": 0.8452388535031846, "calib/step_q_w_n": 628.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2497.0, "completions/max_terminated_length": 2497.0, "completions/mean_length": 287.34375, "completions/mean_terminated_length": 288.4706115722656, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0368, "grad_norm": 32.517234802246094, "learning_rate": 9.88888888888889e-07, "loss": -0.0535, "num_tokens": 7891513.0, "reward": 0.505859375, "reward_std": 0.4474440813064575, "rewards/accuracy_reward_step": 0.2109375, "rewards/format_reward_step": 0.58984375, "step": 23 }, { "calib/answer_extract_rate": 0.71875, "calib/avg_num_step_conf": 3.5390625, "calib/ece": 0.6369543147208123, "calib/final_conf_rate": 0.76953125, "calib/format_rate": 0.63671875, "calib/frac_conf_gt_0.9": 0.6649746192893401, "calib/gap": 0.05553333333333332, "calib/mean_conf": 0.8907614213197971, "calib/mu_c": 0.9322, "calib/mu_w": 0.8766666666666667, "calib/nonempty_final_conf_rate": 0.76953125, "calib/nonempty_reasoning_rate": 0.79296875, "calib/nonempty_step_conf_rate": 0.71875, "calib/pce": 0.6369543147208123, "calib/std_conf": 0.1714729527857561, "calib/step_conf_rate": 0.71875, "calib/step_q_c": 0.8439673913043478, "calib/step_q_c_n": 184.0, "calib/step_q_gap": 0.03170977357581595, "calib/step_q_w": 0.8122576177285319, "calib/step_q_w_n": 722.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2468.0, "completions/max_terminated_length": 2468.0, "completions/mean_length": 285.83203125, "completions/mean_terminated_length": 288.0826721191406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0384, "grad_norm": 28.229576110839844, "learning_rate": 9.833333333333332e-07, "loss": -0.052, "num_tokens": 8224238.0, "reward": 0.513671875, "reward_std": 0.42473044991493225, "rewards/accuracy_reward_step": 0.1953125, "rewards/format_reward_step": 0.63671875, "step": 24 }, { "calib/answer_extract_rate": 0.72265625, "calib/avg_num_step_conf": 3.98046875, "calib/ece": 0.6131979695431471, "calib/final_conf_rate": 0.76953125, "calib/format_rate": 0.6484375, "calib/frac_conf_gt_0.9": 0.5634517766497462, "calib/gap": 0.05753561253561279, "calib/mean_conf": 0.8873096446700508, "calib/mu_c": 0.9290740740740744, "calib/mu_w": 0.8715384615384616, "calib/nonempty_final_conf_rate": 0.76953125, "calib/nonempty_reasoning_rate": 0.78515625, "calib/nonempty_step_conf_rate": 0.72265625, "calib/pce": 0.6131979695431471, "calib/std_conf": 0.14102010713951482, "calib/step_conf_rate": 0.72265625, "calib/step_q_c": 0.8701724137931034, "calib/step_q_c_n": 232.0, "calib/step_q_gap": 0.05313302370415807, "calib/step_q_w": 0.8170393900889453, "calib/step_q_w_n": 787.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2438.0, "completions/max_terminated_length": 2438.0, "completions/mean_length": 258.58203125, "completions/mean_terminated_length": 260.61810302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.04, "grad_norm": 22.257802963256836, "learning_rate": 9.777777777777778e-07, "loss": -0.085, "num_tokens": 8552115.0, "reward": 0.5390625, "reward_std": 0.4408751428127289, "rewards/accuracy_reward_step": 0.21484375, "rewards/format_reward_step": 0.6484375, "step": 25 }, { "calib/answer_extract_rate": 0.76171875, "calib/avg_num_step_conf": 3.8515625, "calib/ece": 0.7327488151658769, "calib/final_conf_rate": 0.82421875, "calib/format_rate": 0.66015625, "calib/frac_conf_gt_0.9": 0.6161137440758294, "calib/gap": 0.041826984126984046, "calib/mean_conf": 0.9033649289099527, "calib/mu_c": 0.9380555555555556, "calib/mu_w": 0.8962285714285716, "calib/nonempty_final_conf_rate": 0.82421875, "calib/nonempty_reasoning_rate": 0.80859375, "calib/nonempty_step_conf_rate": 0.7265625, "calib/pce": 0.7327488151658769, "calib/std_conf": 0.1236090554144201, "calib/step_conf_rate": 0.7265625, "calib/step_q_c": 0.8673885350318472, "calib/step_q_c_n": 157.0, "calib/step_q_gap": 0.028088173150061957, "calib/step_q_w": 0.8393003618817853, "calib/step_q_w_n": 829.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2222.0, "completions/max_terminated_length": 2222.0, "completions/mean_length": 257.65234375, "completions/mean_terminated_length": 258.6627502441406, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0416, "grad_norm": 25.09646987915039, "learning_rate": 9.722222222222222e-07, "loss": -0.1375, "num_tokens": 8878442.0, "reward": 0.470703125, "reward_std": 0.407234251499176, "rewards/accuracy_reward_step": 0.140625, "rewards/format_reward_step": 0.66015625, "step": 26 }, { "calib/answer_extract_rate": 0.73828125, "calib/avg_num_step_conf": 3.6875, "calib/ece": 0.70248730964467, "calib/final_conf_rate": 0.76953125, "calib/format_rate": 0.6171875, "calib/frac_conf_gt_0.9": 0.5076142131979695, "calib/gap": 0.06638036809815939, "calib/mean_conf": 0.8750761421319798, "calib/mu_c": 0.9299999999999999, "calib/mu_w": 0.8636196319018405, "calib/nonempty_final_conf_rate": 0.76953125, "calib/nonempty_reasoning_rate": 0.796875, "calib/nonempty_step_conf_rate": 0.703125, "calib/pce": 0.70248730964467, "calib/std_conf": 0.162401455600176, "calib/step_conf_rate": 0.703125, "calib/step_q_c": 0.8438848920863309, "calib/step_q_c_n": 139.0, "calib/step_q_gap": 0.02372340140310103, "calib/step_q_w": 0.8201614906832299, "calib/step_q_w_n": 805.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2806.0, "completions/max_terminated_length": 2806.0, "completions/mean_length": 278.7109375, "completions/mean_terminated_length": 280.905517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0432, "grad_norm": 45.72233963012695, "learning_rate": 9.666666666666666e-07, "loss": -0.0227, "num_tokens": 9211936.0, "reward": 0.44140625, "reward_std": 0.37693342566490173, "rewards/accuracy_reward_step": 0.1328125, "rewards/format_reward_step": 0.6171875, "step": 27 }, { "calib/answer_extract_rate": 0.79296875, "calib/avg_num_step_conf": 3.9375, "calib/ece": 0.6911004784688995, "calib/final_conf_rate": 0.81640625, "calib/format_rate": 0.69140625, "calib/frac_conf_gt_0.9": 0.5933014354066986, "calib/gap": 0.030397793263646777, "calib/mean_conf": 0.8872727272727272, "calib/mu_c": 0.9117073170731707, "calib/mu_w": 0.8813095238095239, "calib/nonempty_final_conf_rate": 0.81640625, "calib/nonempty_reasoning_rate": 0.83984375, "calib/nonempty_step_conf_rate": 0.75390625, "calib/pce": 0.6911004784688995, "calib/std_conf": 0.1610543231267796, "calib/step_conf_rate": 0.75390625, "calib/step_q_c": 0.8209389671361503, "calib/step_q_c_n": 213.0, "calib/step_q_gap": -0.023287447958189267, "calib/step_q_w": 0.8442264150943396, "calib/step_q_w_n": 795.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1805.0, "completions/max_terminated_length": 1805.0, "completions/mean_length": 260.96484375, "completions/mean_terminated_length": 261.9882507324219, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0448, "grad_norm": 27.64651107788086, "learning_rate": 9.61111111111111e-07, "loss": -0.1104, "num_tokens": 9535415.0, "reward": 0.505859375, "reward_std": 0.41910263895988464, "rewards/accuracy_reward_step": 0.16015625, "rewards/format_reward_step": 0.69140625, "step": 28 }, { "calib/answer_extract_rate": 0.7265625, "calib/avg_num_step_conf": 3.7578125, "calib/ece": 0.7486528497409327, "calib/final_conf_rate": 0.75390625, "calib/format_rate": 0.62890625, "calib/frac_conf_gt_0.9": 0.694300518134715, "calib/gap": 0.031744953416149024, "calib/mean_conf": 0.9144559585492228, "calib/mu_c": 0.9409375, "calib/mu_w": 0.909192546583851, "calib/nonempty_final_conf_rate": 0.75390625, "calib/nonempty_reasoning_rate": 0.80078125, "calib/nonempty_step_conf_rate": 0.73046875, "calib/pce": 0.7486528497409327, "calib/std_conf": 0.11464222055563011, "calib/step_conf_rate": 0.73046875, "calib/step_q_c": 0.8688235294117648, "calib/step_q_c_n": 153.0, "calib/step_q_gap": 0.028020068348723948, "calib/step_q_w": 0.8408034610630408, "calib/step_q_w_n": 809.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 298.55859375, "completions/mean_terminated_length": 300.9094543457031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0464, "grad_norm": 28.71442413330078, "learning_rate": 9.555555555555556e-07, "loss": -0.08, "num_tokens": 9873230.0, "reward": 0.447265625, "reward_std": 0.38781794905662537, "rewards/accuracy_reward_step": 0.1328125, "rewards/format_reward_step": 0.62890625, "step": 29 }, { "calib/answer_extract_rate": 0.80078125, "calib/avg_num_step_conf": 4.0703125, "calib/ece": 0.5768047619047619, "calib/final_conf_rate": 0.8203125, "calib/format_rate": 0.72265625, "calib/frac_conf_gt_0.9": 0.6142857142857143, "calib/gap": 0.04852389762565534, "calib/mean_conf": 0.9053761904761906, "calib/mu_c": 0.9379565217391306, "calib/mu_w": 0.8894326241134752, "calib/nonempty_final_conf_rate": 0.8203125, "calib/nonempty_reasoning_rate": 0.8359375, "calib/nonempty_step_conf_rate": 0.765625, "calib/pce": 0.5768047619047619, "calib/std_conf": 0.10518082572300542, "calib/step_conf_rate": 0.765625, "calib/step_q_c": 0.8512857142857142, "calib/step_q_c_n": 315.0, "calib/step_q_gap": 0.030652977009235527, "calib/step_q_w": 0.8206327372764787, "calib/step_q_w_n": 727.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2438.0, "completions/max_terminated_length": 2438.0, "completions/mean_length": 250.2265625, "completions/mean_terminated_length": 252.1968536376953, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.048, "grad_norm": 48.1530647277832, "learning_rate": 9.499999999999999e-07, "loss": -0.1343, "num_tokens": 10198696.0, "reward": 0.630859375, "reward_std": 0.416718989610672, "rewards/accuracy_reward_step": 0.26953125, "rewards/format_reward_step": 0.72265625, "step": 30 }, { "calib/answer_extract_rate": 0.7890625, "calib/avg_num_step_conf": 3.76171875, "calib/ece": 0.5877522935779818, "calib/final_conf_rate": 0.8515625, "calib/format_rate": 0.71875, "calib/frac_conf_gt_0.9": 0.6559633027522935, "calib/gap": 0.04655675517945701, "calib/mean_conf": 0.9042660550458715, "calib/mu_c": 0.936086956521739, "calib/mu_w": 0.889530201342282, "calib/nonempty_final_conf_rate": 0.8515625, "calib/nonempty_reasoning_rate": 0.81640625, "calib/nonempty_step_conf_rate": 0.7578125, "calib/pce": 0.5877522935779818, "calib/std_conf": 0.13482119604419016, "calib/step_conf_rate": 0.7578125, "calib/step_q_c": 0.85013201320132, "calib/step_q_c_n": 303.0, "calib/step_q_gap": 0.004071407140713856, "calib/step_q_w": 0.8460606060606062, "calib/step_q_w_n": 660.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 233.57421875, "completions/mean_terminated_length": 233.57421875, "completions/min_length": 1.0, "completions/min_terminated_length": 1.0, "epoch": 0.0496, "grad_norm": 30.72689437866211, "learning_rate": 9.444444444444444e-07, "loss": -0.1023, "num_tokens": 10518179.0, "reward": 0.62890625, "reward_std": 0.42506512999534607, "rewards/accuracy_reward_step": 0.26953125, "rewards/format_reward_step": 0.71875, "step": 31 }, { "calib/answer_extract_rate": 0.81640625, "calib/avg_num_step_conf": 4.25, "calib/ece": 0.6657651162790699, "calib/final_conf_rate": 0.83984375, "calib/format_rate": 0.75, "calib/frac_conf_gt_0.9": 0.6186046511627907, "calib/gap": 0.027463636363636357, "calib/mean_conf": 0.8983232558139534, "calib/mu_c": 0.9194000000000001, "calib/mu_w": 0.8919363636363637, "calib/nonempty_final_conf_rate": 0.83984375, "calib/nonempty_reasoning_rate": 0.8671875, "calib/nonempty_step_conf_rate": 0.80859375, "calib/pce": 0.6657651162790699, "calib/std_conf": 0.11949634194500217, "calib/step_conf_rate": 0.80859375, "calib/step_q_c": 0.8575824175824176, "calib/step_q_c_n": 273.0, "calib/step_q_gap": 0.03659836850266285, "calib/step_q_w": 0.8209840490797548, "calib/step_q_w_n": 815.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 819.0, "completions/max_terminated_length": 819.0, "completions/mean_length": 249.30859375, "completions/mean_terminated_length": 251.2716522216797, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0512, "grad_norm": 56.229164123535156, "learning_rate": 9.388888888888888e-07, "loss": -0.1638, "num_tokens": 10844042.0, "reward": 0.57421875, "reward_std": 0.3676033020019531, "rewards/accuracy_reward_step": 0.19921875, "rewards/format_reward_step": 0.75, "step": 32 }, { "calib/answer_extract_rate": 0.91796875, "calib/avg_num_step_conf": 4.984375, "calib/ece": 0.6375208333333335, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.8125, "calib/frac_conf_gt_0.9": 0.5833333333333334, "calib/gap": 0.04897517216382752, "calib/mean_conf": 0.8958541666666666, "calib/mu_c": 0.9321774193548388, "calib/mu_w": 0.8832022471910113, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.8671875, "calib/pce": 0.6375208333333335, "calib/std_conf": 0.13658025564714282, "calib/step_conf_rate": 0.8671875, "calib/step_q_c": 0.8521617647058823, "calib/step_q_c_n": 340.0, "calib/step_q_gap": 0.028700226244343874, "calib/step_q_w": 0.8234615384615385, "calib/step_q_w_n": 936.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 893.0, "completions/max_terminated_length": 893.0, "completions/mean_length": 258.7109375, "completions/mean_terminated_length": 260.7480163574219, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0528, "grad_norm": 33.666343688964844, "learning_rate": 9.333333333333333e-07, "loss": -0.0623, "num_tokens": 11171512.0, "reward": 0.6484375, "reward_std": 0.3970039486885071, "rewards/accuracy_reward_step": 0.2421875, "rewards/format_reward_step": 0.8125, "step": 33 }, { "calib/answer_extract_rate": 0.9140625, "calib/avg_num_step_conf": 4.79296875, "calib/ece": 0.7196583333333335, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.84765625, "calib/frac_conf_gt_0.9": 0.6416666666666667, "calib/gap": 0.04740976739058533, "calib/mean_conf": 0.9154916666666666, "calib/mu_c": 0.9536170212765959, "calib/mu_w": 0.9062072538860105, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9375, "calib/nonempty_step_conf_rate": 0.890625, "calib/pce": 0.7196583333333335, "calib/std_conf": 0.09544143019266958, "calib/step_conf_rate": 0.890625, "calib/step_q_c": 0.87369918699187, "calib/step_q_c_n": 246.0, "calib/step_q_gap": 0.038581959672807864, "calib/step_q_w": 0.8351172273190621, "calib/step_q_w_n": 981.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 261.5859375, "completions/mean_terminated_length": 262.6117858886719, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0544, "grad_norm": 18.248090744018555, "learning_rate": 9.277777777777777e-07, "loss": -0.0844, "num_tokens": 11499734.0, "reward": 0.607421875, "reward_std": 0.3113514184951782, "rewards/accuracy_reward_step": 0.18359375, "rewards/format_reward_step": 0.84765625, "step": 34 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 4.90625, "calib/ece": 0.5461216326530611, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.8828125, "calib/frac_conf_gt_0.9": 0.6530612244897959, "calib/gap": 0.02993523728331904, "calib/mean_conf": 0.9216318367346938, "calib/mu_c": 0.9403260869565218, "calib/mu_w": 0.9103908496732027, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.5461216326530611, "calib/std_conf": 0.0916808045623392, "calib/step_conf_rate": 0.91796875, "calib/step_q_c": 0.8607843137254901, "calib/step_q_c_n": 408.0, "calib/step_q_gap": 0.011596224102848662, "calib/step_q_w": 0.8491880896226415, "calib/step_q_w_n": 848.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1752.0, "completions/max_terminated_length": 1752.0, "completions/mean_length": 253.953125, "completions/mean_terminated_length": 254.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 19.0, "epoch": 0.056, "grad_norm": 10.243550300598145, "learning_rate": 9.222222222222222e-07, "loss": -0.0095, "num_tokens": 11826890.0, "reward": 0.80078125, "reward_std": 0.39892858266830444, "rewards/accuracy_reward_step": 0.359375, "rewards/format_reward_step": 0.8828125, "step": 35 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 4.875, "calib/ece": 0.5691666666666667, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.7208333333333333, "calib/gap": 0.04074383301707796, "calib/mean_conf": 0.9233333333333332, "calib/mu_c": 0.9496470588235295, "calib/mu_w": 0.9089032258064516, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.5691666666666667, "calib/std_conf": 0.09852946541799339, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.8680975609756097, "calib/step_q_c_n": 410.0, "calib/step_q_gap": 0.022238372431456987, "calib/step_q_w": 0.8458591885441528, "calib/step_q_w_n": 838.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 898.0, "completions/max_terminated_length": 898.0, "completions/mean_length": 238.45703125, "completions/mean_terminated_length": 239.3921661376953, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0576, "grad_norm": 20.548439025878906, "learning_rate": 9.166666666666665e-07, "loss": -0.0271, "num_tokens": 12141415.0, "reward": 0.779296875, "reward_std": 0.4399545192718506, "rewards/accuracy_reward_step": 0.3359375, "rewards/format_reward_step": 0.88671875, "step": 36 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 4.98046875, "calib/ece": 0.644792531120332, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.8671875, "calib/frac_conf_gt_0.9": 0.7053941908713693, "calib/gap": 0.04945061188811184, "calib/mean_conf": 0.9145020746887966, "calib/mu_c": 0.9506153846153846, "calib/mu_w": 0.9011647727272728, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.644792531120332, "calib/std_conf": 0.1280647568455238, "calib/step_conf_rate": 0.91796875, "calib/step_q_c": 0.8601038062283737, "calib/step_q_c_n": 289.0, "calib/step_q_gap": 0.0183695263095095, "calib/step_q_w": 0.8417342799188642, "calib/step_q_w_n": 986.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 845.0, "completions/max_terminated_length": 845.0, "completions/mean_length": 247.51953125, "completions/mean_terminated_length": 249.468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0592, "grad_norm": 16.210430145263672, "learning_rate": 9.11111111111111e-07, "loss": -0.0462, "num_tokens": 12466204.0, "reward": 0.6875, "reward_std": 0.4034990072250366, "rewards/accuracy_reward_step": 0.25390625, "rewards/format_reward_step": 0.8671875, "step": 37 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 5.1640625, "calib/ece": 0.6612863070539419, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.7012448132780082, "calib/gap": 0.04826795580110521, "calib/mean_conf": 0.9102489626556016, "calib/mu_c": 0.9465000000000001, "calib/mu_w": 0.8982320441988949, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.6612863070539419, "calib/std_conf": 0.13660958488150282, "calib/step_conf_rate": 0.9296875, "calib/step_q_c": 0.84876582278481, "calib/step_q_c_n": 316.0, "calib/step_q_gap": 0.006747930140674763, "calib/step_q_w": 0.8420178926441353, "calib/step_q_w_n": 1006.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 583.0, "completions/max_terminated_length": 583.0, "completions/mean_length": 237.515625, "completions/mean_terminated_length": 239.3858184814453, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0608, "grad_norm": 10.232465744018555, "learning_rate": 9.055555555555556e-07, "loss": -0.0463, "num_tokens": 12789000.0, "reward": 0.671875, "reward_std": 0.30752283334732056, "rewards/accuracy_reward_step": 0.234375, "rewards/format_reward_step": 0.875, "step": 38 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 5.71875, "calib/ece": 0.7620164609053499, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.6337448559670782, "calib/gap": 0.04123720808186837, "calib/mean_conf": 0.9109876543209877, "calib/mu_c": 0.9459459459459462, "calib/mu_w": 0.9047087378640778, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.7603703703703705, "calib/std_conf": 0.11771422049891216, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.8639444444444444, "calib/step_q_c_n": 180.0, "calib/step_q_gap": 0.023850913657227024, "calib/step_q_w": 0.8400935307872174, "calib/step_q_w_n": 1283.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 800.0, "completions/max_terminated_length": 800.0, "completions/mean_length": 272.58203125, "completions/mean_terminated_length": 274.72833251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0624, "grad_norm": 63.97758483886719, "learning_rate": 9e-07, "loss": -0.0187, "num_tokens": 13119877.0, "reward": 0.591796875, "reward_std": 0.2877320349216461, "rewards/accuracy_reward_step": 0.1484375, "rewards/format_reward_step": 0.88671875, "step": 39 }, { "calib/answer_extract_rate": 0.92578125, "calib/avg_num_step_conf": 5.28125, "calib/ece": 0.6455833333333334, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.6583333333333333, "calib/gap": 0.03305120167189124, "calib/mean_conf": 0.9205833333333333, "calib/mu_c": 0.9445454545454546, "calib/mu_w": 0.9114942528735633, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.6455833333333334, "calib/std_conf": 0.09383758871345509, "calib/step_conf_rate": 0.91796875, "calib/step_q_c": 0.8645266272189348, "calib/step_q_c_n": 338.0, "calib/step_q_gap": 0.01657790927021685, "calib/step_q_w": 0.847948717948718, "calib/step_q_w_n": 1014.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1705.0, "completions/max_terminated_length": 1705.0, "completions/mean_length": 262.23046875, "completions/mean_terminated_length": 263.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.064, "grad_norm": 25.175500869750977, "learning_rate": 8.944444444444445e-07, "loss": -0.0202, "num_tokens": 13449152.0, "reward": 0.685546875, "reward_std": 0.3665532171726227, "rewards/accuracy_reward_step": 0.2578125, "rewards/format_reward_step": 0.85546875, "step": 40 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 5.37890625, "calib/ece": 0.7596265145228215, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.7427385892116183, "calib/gap": 0.02382273510409194, "calib/mean_conf": 0.9339003734439835, "calib/mu_c": 0.9535714285714286, "calib/mu_w": 0.9297486934673367, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.7596265145228215, "calib/std_conf": 0.06590362283175459, "calib/step_conf_rate": 0.9375, "calib/step_q_c": 0.8828634361233482, "calib/step_q_c_n": 227.0, "calib/step_q_gap": 0.030634436123348152, "calib/step_q_w": 0.852229, "calib/step_q_w_n": 1150.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 590.0, "completions/max_terminated_length": 590.0, "completions/mean_length": 244.40234375, "completions/mean_terminated_length": 246.32676696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0656, "grad_norm": 14.0829496383667, "learning_rate": 8.888888888888888e-07, "loss": -0.0629, "num_tokens": 13773863.0, "reward": 0.61328125, "reward_std": 0.25447797775268555, "rewards/accuracy_reward_step": 0.1640625, "rewards/format_reward_step": 0.8984375, "step": 41 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 5.6640625, "calib/ece": 0.5372064777327934, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.6923076923076923, "calib/gap": 0.021238272626931343, "calib/mean_conf": 0.9258704453441295, "calib/mu_c": 0.9388541666666667, "calib/mu_w": 0.9176158940397353, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.5372064777327934, "calib/std_conf": 0.08506220287114333, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.8620292504570385, "calib/step_q_c_n": 547.0, "calib/step_q_gap": 0.0074999038346685865, "calib/step_q_w": 0.8545293466223699, "calib/step_q_w_n": 903.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 779.0, "completions/max_terminated_length": 779.0, "completions/mean_length": 261.890625, "completions/mean_terminated_length": 263.9527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0672, "grad_norm": 17.002763748168945, "learning_rate": 8.833333333333333e-07, "loss": -0.0442, "num_tokens": 14099395.0, "reward": 0.830078125, "reward_std": 0.3919033706188202, "rewards/accuracy_reward_step": 0.375, "rewards/format_reward_step": 0.91015625, "step": 42 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 6.32421875, "calib/ece": 0.6638367346938774, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.7061224489795919, "calib/gap": 0.029949930939226488, "calib/mean_conf": 0.9250612244897959, "calib/mu_c": 0.9471875000000001, "calib/mu_w": 0.9172375690607736, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.6638367346938774, "calib/std_conf": 0.09329181956903092, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.858123324396783, "calib/step_q_c_n": 373.0, "calib/step_q_gap": 0.01197565184461602, "calib/step_q_w": 0.8461476725521669, "calib/step_q_w_n": 1246.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2620.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 312.546875, "completions/mean_terminated_length": 313.7725524902344, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0688, "grad_norm": 7.766086578369141, "learning_rate": 8.777777777777777e-07, "loss": -0.0113, "num_tokens": 14441551.0, "reward": 0.693359375, "reward_std": 0.30214864015579224, "rewards/accuracy_reward_step": 0.25, "rewards/format_reward_step": 0.88671875, "step": 43 }, { "calib/answer_extract_rate": 0.921875, "calib/avg_num_step_conf": 7.21875, "calib/ece": 0.6732780082987552, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.6804979253112033, "calib/gap": 0.03413479052823298, "calib/mean_conf": 0.9197510373443984, "calib/mu_c": 0.9452459016393441, "calib/mu_w": 0.9111111111111111, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.6699585062240664, "calib/std_conf": 0.10817584160242431, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.8699236641221374, "calib/step_q_c_n": 393.0, "calib/step_q_gap": 0.0193394716822749, "calib/step_q_w": 0.8505841924398625, "calib/step_q_w_n": 1455.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1817.0, "completions/max_terminated_length": 1817.0, "completions/mean_length": 301.06640625, "completions/mean_terminated_length": 303.43701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0704, "grad_norm": 22.831974029541016, "learning_rate": 8.722222222222222e-07, "loss": -0.0944, "num_tokens": 14778840.0, "reward": 0.681640625, "reward_std": 0.35068172216415405, "rewards/accuracy_reward_step": 0.2421875, "rewards/format_reward_step": 0.87890625, "step": 44 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 7.21875, "calib/ece": 0.6819246861924687, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.6569037656903766, "calib/gap": 0.02893930523028876, "calib/mean_conf": 0.916234309623431, "calib/mu_c": 0.9383928571428571, "calib/mu_w": 0.9094535519125684, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.6819246861924687, "calib/std_conf": 0.10689240824307658, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.8586650485436893, "calib/step_q_c_n": 412.0, "calib/step_q_gap": 0.028442207318062573, "calib/step_q_w": 0.8302228412256267, "calib/step_q_w_n": 1436.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2664.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 333.71875, "completions/mean_terminated_length": 336.3464660644531, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.072, "grad_norm": 33.78303527832031, "learning_rate": 8.666666666666667e-07, "loss": -0.0129, "num_tokens": 15119352.0, "reward": 0.634765625, "reward_std": 0.34433940052986145, "rewards/accuracy_reward_step": 0.21875, "rewards/format_reward_step": 0.83203125, "step": 45 }, { "calib/answer_extract_rate": 0.78125, "calib/avg_num_step_conf": 8.2578125, "calib/ece": 0.7925333333333335, "calib/final_conf_rate": 0.87890625, "calib/format_rate": 0.71484375, "calib/frac_conf_gt_0.9": 0.6355555555555555, "calib/gap": 0.008950000000000014, "calib/mean_conf": 0.9036444444444444, "calib/mu_c": 0.9116, "calib/mu_w": 0.90265, "calib/nonempty_final_conf_rate": 0.87890625, "calib/nonempty_reasoning_rate": 0.921875, "calib/nonempty_step_conf_rate": 0.89453125, "calib/pce": 0.7925333333333335, "calib/std_conf": 0.11397488135658186, "calib/step_conf_rate": 0.89453125, "calib/step_q_c": 0.8361176470588234, "calib/step_q_c_n": 170.0, "calib/step_q_gap": 0.022300774630839904, "calib/step_q_w": 0.8138168724279835, "calib/step_q_w_n": 1944.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 390.0625, "completions/mean_terminated_length": 393.13385009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0736, "grad_norm": 54.98508071899414, "learning_rate": 8.611111111111111e-07, "loss": -0.0464, "num_tokens": 15475712.0, "reward": 0.458984375, "reward_std": 0.34532803297042847, "rewards/accuracy_reward_step": 0.1015625, "rewards/format_reward_step": 0.71484375, "step": 46 }, { "calib/answer_extract_rate": 0.51953125, "calib/avg_num_step_conf": 8.9296875, "calib/ece": 0.8392434210526315, "calib/final_conf_rate": 0.59375, "calib/format_rate": 0.40625, "calib/frac_conf_gt_0.9": 0.6118421052631579, "calib/gap": 0.013880281690141105, "calib/mean_conf": 0.9050328947368422, "calib/mu_c": 0.9180000000000001, "calib/mu_w": 0.904119718309859, "calib/nonempty_final_conf_rate": 0.59375, "calib/nonempty_reasoning_rate": 0.73046875, "calib/nonempty_step_conf_rate": 0.69921875, "calib/pce": 0.8392434210526315, "calib/std_conf": 0.1176657519463254, "calib/step_conf_rate": 0.69921875, "calib/step_q_c": 0.8893055555555556, "calib/step_q_c_n": 72.0, "calib/step_q_gap": 0.09709462511291789, "calib/step_q_w": 0.7922109304426377, "calib/step_q_w_n": 2214.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2930.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 585.80859375, "completions/mean_terminated_length": 590.4212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0752, "grad_norm": 21.259794235229492, "learning_rate": 8.555555555555555e-07, "loss": -0.0241, "num_tokens": 15886935.0, "reward": 0.24609375, "reward_std": 0.3053395748138428, "rewards/accuracy_reward_step": 0.04296875, "rewards/format_reward_step": 0.40625, "step": 47 }, { "calib/answer_extract_rate": 0.1640625, "calib/avg_num_step_conf": 2.48046875, "calib/ece": 0.8633076923076923, "calib/final_conf_rate": 0.15234375, "calib/format_rate": 0.09765625, "calib/frac_conf_gt_0.9": 0.6153846153846154, "calib/gap": 0.005702702702702567, "calib/mean_conf": 0.9145897435897435, "calib/mu_c": 0.9199999999999999, "calib/mu_w": 0.9142972972972974, "calib/nonempty_final_conf_rate": 0.15234375, "calib/nonempty_reasoning_rate": 0.29296875, "calib/nonempty_step_conf_rate": 0.26953125, "calib/pce": 0.8633076923076923, "calib/std_conf": 0.07260702726063477, "calib/step_conf_rate": 0.26953125, "calib/step_q_c": 0.76, "calib/step_q_c_n": 10.0, "calib/step_q_gap": -0.0807663999999999, "calib/step_q_w": 0.8407663999999999, "calib/step_q_w_n": 625.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 958.6015625, "completions/mean_terminated_length": 977.697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0768, "grad_norm": 26.788244247436523, "learning_rate": 8.499999999999999e-07, "loss": 0.0328, "num_tokens": 16386185.0, "reward": 0.060546875, "reward_std": 0.12249716371297836, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.09765625, "step": 48 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.9020000000000001, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.8, "calib/mean_conf": 0.9020000000000001, "calib/mu_c": NaN, "calib/mu_w": 0.9020000000000001, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.9020000000000001, "calib/std_conf": 0.12734205903785284, "calib/step_conf_rate": 0.078125, "calib/step_q_w": 0.7507420634920635, "calib/step_q_w_n": 252.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2898.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 1243.87890625, "completions/mean_terminated_length": 1278.8472900390625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0784, "grad_norm": 5.6303815841674805, "learning_rate": 8.444444444444444e-07, "loss": -0.0025, "num_tokens": 16963690.0, "reward": 0.0078125, "reward_std": 0.018281511962413788, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.015625, "step": 49 }, { "calib/answer_extract_rate": 0.03515625, "calib/avg_num_step_conf": 0.49609375, "calib/ece": 0.8109999999999999, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.2, "calib/mean_conf": 0.8109999999999999, "calib/mu_c": NaN, "calib/mu_w": 0.8109999999999999, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.8109999999999999, "calib/std_conf": 0.11226753760548948, "calib/step_conf_rate": 0.046875, "calib/step_q_w": 0.7346020997375329, "calib/step_q_w_n": 127.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 1146.8671875, "completions/mean_terminated_length": 1165.071533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.08, "grad_norm": 0.5443186163902283, "learning_rate": 8.388888888888888e-07, "loss": 0.0011, "num_tokens": 17519288.0, "reward": 0.00390625, "reward_std": 0.007232969626784325, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0078125, "step": 50 }, { "calib/answer_extract_rate": 0.0390625, "calib/avg_num_step_conf": 0.5859375, "calib/ece": 0.7766666666666667, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.5, "calib/mean_conf": 0.7766666666666667, "calib/mu_c": NaN, "calib/mu_w": 0.7766666666666667, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.10546875, "calib/pce": 0.7766666666666667, "calib/std_conf": 0.2907843798341918, "calib/step_conf_rate": 0.10546875, "calib/step_q_w": 0.8009333333333334, "calib/step_q_w_n": 150.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 1167.09765625, "completions/mean_terminated_length": 1185.623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0816, "grad_norm": 13.37551498413086, "learning_rate": 8.333333333333333e-07, "loss": 0.0419, "num_tokens": 18075633.0, "reward": 0.013671875, "reward_std": 0.03485432639718056, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.02734375, "step": 51 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.27734375, "calib/ece": 0.7934549999999999, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/mean_conf": 0.7934549999999999, "calib/mu_c": NaN, "calib/mu_w": 0.7934549999999999, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.7934549999999999, "calib/std_conf": 0.25778263154254594, "calib/step_conf_rate": 0.046875, "calib/step_q_w": 0.771830985915493, "calib/step_q_w_n": 71.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 1342.6015625, "completions/mean_terminated_length": 1353.1732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0832, "grad_norm": 0.47544148564338684, "learning_rate": 8.277777777777777e-07, "loss": 0.0118, "num_tokens": 18681411.0, "reward": 0.001953125, "reward_std": 0.005524271633476019, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "step": 52 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.15234375, "calib/ece": 0.98, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.98, "calib/mu_c": NaN, "calib/mu_w": 0.98, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.98, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.04296875, "calib/step_q_w": 0.7835897435897436, "calib/step_q_w_n": 39.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 1348.04296875, "completions/mean_terminated_length": 1385.939697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0848, "grad_norm": 0.9600926637649536, "learning_rate": 8.222222222222221e-07, "loss": 0.0017, "num_tokens": 19288406.0, "reward": 0.001953125, "reward_std": 0.005524271633476019, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "step": 53 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.328125, "calib/ece": 0.8422222222222223, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.5555555555555556, "calib/mean_conf": 0.8422222222222222, "calib/mu_c": NaN, "calib/mu_w": 0.8422222222222222, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.8422222222222223, "calib/std_conf": 0.15324474091231954, "calib/step_conf_rate": 0.09375, "calib/step_q_w": 0.8053571428571425, "calib/step_q_w_n": 84.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2953.0, "completions/max_terminated_length": 2953.0, "completions/mean_length": 1340.8671875, "completions/mean_terminated_length": 1389.7247314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0864, "grad_norm": 1.3612425327301025, "learning_rate": 8.166666666666666e-07, "loss": -0.0042, "num_tokens": 19893140.0, "reward": 0.0078125, "reward_std": 0.018281511962413788, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.015625, "step": 54 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.06640625, "calib/ece": 0.9, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/mu_c": NaN, "calib/mu_w": 0.9, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.9, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.03515625, "calib/step_q_w": 0.7994117647058824, "calib/step_q_w_n": 17.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2943.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 1346.0625, "completions/mean_terminated_length": 1378.3680419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.088, "grad_norm": 0.0, "learning_rate": 8.11111111111111e-07, "loss": 0.0, "num_tokens": 20499572.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 55 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.03125, "calib/ece": 0.75, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.75, "calib/mu_c": NaN, "calib/mu_w": 0.75, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.75, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0078125, "calib/step_q_w": 0.577815, "calib/step_q_w_n": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2960.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 1211.46484375, "completions/mean_terminated_length": 1221.00390625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0896, "grad_norm": 0.0, "learning_rate": 8.055555555555556e-07, "loss": 0.0, "num_tokens": 21071851.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 56 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.09375, "calib/ece": 0.8666666666666667, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/mean_conf": 0.8666666666666667, "calib/mu_c": NaN, "calib/mu_w": 0.8666666666666667, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.8666666666666667, "calib/std_conf": 0.08498365855987973, "calib/step_conf_rate": 0.046875, "calib/step_q_w": 0.7087499999999999, "calib/step_q_w_n": 24.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 1441.05078125, "completions/mean_terminated_length": 1458.138427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0912, "grad_norm": 0.0, "learning_rate": 8e-07, "loss": 0.0, "num_tokens": 21697264.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 57 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.00390625, "calib/ece": 0.89, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.89, "calib/mu_c": NaN, "calib/mu_w": 0.89, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.89, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.83, "calib/step_q_w_n": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 1447.109375, "completions/mean_terminated_length": 1464.268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0928, "grad_norm": 0.07886432111263275, "learning_rate": 7.944444444444444e-07, "loss": 0.0072, "num_tokens": 22327828.0, "reward": 0.001953125, "reward_std": 0.005524271633476019, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "step": 58 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2983.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 1470.5390625, "completions/mean_terminated_length": 1487.976318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0944, "grad_norm": 0.0, "learning_rate": 7.888888888888889e-07, "loss": 0.0, "num_tokens": 22966430.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 59 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.982, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.982, "calib/mu_c": NaN, "calib/mu_w": 0.982, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.982, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 1242.18359375, "completions/mean_terminated_length": 1261.90087890625, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.096, "grad_norm": 0.0, "learning_rate": 7.833333333333333e-07, "loss": 0.0, "num_tokens": 23546573.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 60 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.05078125, "calib/ece": 0.9033333333333333, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/mean_conf": 0.9033333333333333, "calib/mu_c": NaN, "calib/mu_w": 0.9033333333333333, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.9033333333333333, "calib/std_conf": 0.06599663291074438, "calib/step_conf_rate": 0.01953125, "calib/step_q_w": 0.7952777777777778, "calib/step_q_w_n": 12.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 1263.74609375, "completions/mean_terminated_length": 1273.6968994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.0976, "grad_norm": 2.4402859210968018, "learning_rate": 7.777777777777778e-07, "loss": 0.0114, "num_tokens": 24132108.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0078125, "step": 61 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.99, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.99, "calib/mu_c": NaN, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.99, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 1362.46484375, "completions/mean_terminated_length": 1389.6055908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0992, "grad_norm": 0.0, "learning_rate": 7.722222222222222e-07, "loss": 0.0, "num_tokens": 24742483.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 62 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.33984375, "calib/ece": 0.716, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.716, "calib/mu_c": NaN, "calib/mu_w": 0.716, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.716, "calib/std_conf": 0.23711600536446292, "calib/step_conf_rate": 0.046875, "calib/step_q_w": 0.6549770114942529, "calib/step_q_w_n": 87.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 1299.87890625, "completions/mean_terminated_length": 1325.77294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.1008, "grad_norm": 3.530916929244995, "learning_rate": 7.666666666666667e-07, "loss": 0.0124, "num_tokens": 25330612.0, "reward": 0.005859375, "reward_std": 0.01657281443476677, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "step": 63 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 1257.80859375, "completions/mean_terminated_length": 1277.77392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.1024, "grad_norm": 0.0, "learning_rate": 7.61111111111111e-07, "loss": 0.0, "num_tokens": 25914755.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 64 }, { "calib/answer_extract_rate": 0.01953125, "calib/avg_num_step_conf": 0.0859375, "calib/ece": 0.65, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.65, "calib/mu_c": NaN, "calib/mu_w": 0.65, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.65, "calib/std_conf": 0.15000000000000002, "calib/step_conf_rate": 0.01171875, "calib/step_q_w": 0.7795121212121212, "calib/step_q_w_n": 22.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2931.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 1273.6953125, "completions/mean_terminated_length": 1314.7822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.104, "grad_norm": 3.730851173400879, "learning_rate": 7.555555555555555e-07, "loss": 0.0111, "num_tokens": 26498589.0, "reward": 0.001953125, "reward_std": 0.005524271633476019, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "step": 65 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.015625, "calib/ece": 0.3, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.3, "calib/mu_c": NaN, "calib/mu_w": 0.3, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.3, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.01171875, "calib/step_q_w": 0.5225, "calib/step_q_w_n": 4.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 1236.9765625, "completions/mean_terminated_length": 1261.6175537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1056, "grad_norm": 0.0, "learning_rate": 7.5e-07, "loss": 0.0, "num_tokens": 27076695.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 66 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0078125, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 1.0, "calib/step_q_w_n": 2.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 1154.9296875, "completions/mean_terminated_length": 1177.936279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1072, "grad_norm": 0.0, "learning_rate": 7.444444444444444e-07, "loss": 0.0, "num_tokens": 27634109.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 67 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 1266.05859375, "completions/mean_terminated_length": 1271.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.1088, "grad_norm": 0.0, "learning_rate": 7.388888888888889e-07, "loss": 0.0, "num_tokens": 28219700.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 68 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 1211.1015625, "completions/mean_terminated_length": 1225.4625244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.1104, "grad_norm": 0.0, "learning_rate": 7.333333333333332e-07, "loss": 0.0, "num_tokens": 28791558.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 69 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 1100.359375, "completions/mean_terminated_length": 1109.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.112, "grad_norm": 0.0, "learning_rate": 7.277777777777777e-07, "loss": 0.0, "num_tokens": 29334970.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 70 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 1057.43359375, "completions/mean_terminated_length": 1078.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.1136, "grad_norm": 0.0, "learning_rate": 7.222222222222221e-07, "loss": 0.0, "num_tokens": 29863345.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 71 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.01171875, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.01171875, "calib/nonempty_step_conf_rate": 0.0078125, "calib/step_conf_rate": 0.0078125, "calib/step_q_w": 0.7032999999999999, "calib/step_q_w_n": 3.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2981.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 1086.828125, "completions/mean_terminated_length": 1108.4781494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.1152, "grad_norm": 0.0, "learning_rate": 7.166666666666667e-07, "loss": 0.0, "num_tokens": 30399453.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 72 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2895.0, "completions/max_terminated_length": 2895.0, "completions/mean_length": 1068.90234375, "completions/mean_terminated_length": 1090.1953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1168, "grad_norm": 0.0, "learning_rate": 7.111111111111111e-07, "loss": 0.0, "num_tokens": 30933660.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 73 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.45, "calib/step_q_w_n": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 1020.0390625, "completions/mean_terminated_length": 1044.52001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1184, "grad_norm": 0.0, "learning_rate": 7.055555555555556e-07, "loss": 0.0, "num_tokens": 31451646.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 74 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 1156.68359375, "completions/mean_terminated_length": 1170.3992919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 7e-07, "loss": 0.0, "num_tokens": 32007253.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 75 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 1067.0234375, "completions/mean_terminated_length": 1075.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.1216, "grad_norm": 0.0, "learning_rate": 6.944444444444444e-07, "loss": 0.0, "num_tokens": 32541139.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 76 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 1015.3671875, "completions/mean_terminated_length": 1027.4071044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.1232, "grad_norm": 0.0, "learning_rate": 6.888888888888889e-07, "loss": 0.0, "num_tokens": 33061681.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 77 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.9, "calib/step_q_w_n": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 1039.0, "completions/mean_terminated_length": 1051.3201904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1248, "grad_norm": 0.0, "learning_rate": 6.833333333333333e-07, "loss": 0.0, "num_tokens": 33584465.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 78 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 1074.19140625, "completions/mean_terminated_length": 1099.9720458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.1264, "grad_norm": 0.0, "learning_rate": 6.777777777777778e-07, "loss": 0.0, "num_tokens": 34114298.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 79 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2836.0, "completions/max_terminated_length": 2836.0, "completions/mean_length": 1014.15625, "completions/mean_terminated_length": 1022.1417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.128, "grad_norm": 0.0, "learning_rate": 6.722222222222222e-07, "loss": 0.0, "num_tokens": 34635874.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 80 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 1188.2421875, "completions/mean_terminated_length": 1197.598388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1296, "grad_norm": 0.0, "learning_rate": 6.666666666666666e-07, "loss": 0.0, "num_tokens": 35196288.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 81 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.8, "calib/step_q_w_n": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2950.0, "completions/max_terminated_length": 2950.0, "completions/mean_length": 1147.44921875, "completions/mean_terminated_length": 1170.3067626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.1312, "grad_norm": 0.0, "learning_rate": 6.611111111111111e-07, "loss": 0.0, "num_tokens": 35750803.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 82 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2885.0, "completions/max_terminated_length": 2885.0, "completions/mean_length": 1118.296875, "completions/mean_terminated_length": 1136.0477294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.1328, "grad_norm": 0.0, "learning_rate": 6.555555555555555e-07, "loss": 0.0, "num_tokens": 36295047.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 83 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 895.1328125, "completions/mean_terminated_length": 902.1810913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.1344, "grad_norm": 0.0, "learning_rate": 6.5e-07, "loss": 0.0, "num_tokens": 36785433.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 84 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 1084.10546875, "completions/mean_terminated_length": 1096.9605712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.136, "grad_norm": 0.0, "learning_rate": 6.444444444444444e-07, "loss": 0.0, "num_tokens": 37322388.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 85 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 1126.015625, "completions/mean_terminated_length": 1139.36767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.1376, "grad_norm": 0.0, "learning_rate": 6.388888888888888e-07, "loss": 0.0, "num_tokens": 37871904.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 86 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 1228.83984375, "completions/mean_terminated_length": 1238.5157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.1392, "grad_norm": 0.0, "learning_rate": 6.333333333333332e-07, "loss": 0.0, "num_tokens": 38445119.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 87 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2965.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 1121.98046875, "completions/mean_terminated_length": 1130.81494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.1408, "grad_norm": 0.0, "learning_rate": 6.277777777777777e-07, "loss": 0.0, "num_tokens": 38992386.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 88 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 1134.3515625, "completions/mean_terminated_length": 1147.8023681640625, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.1424, "grad_norm": 0.0, "learning_rate": 6.222222222222223e-07, "loss": 0.0, "num_tokens": 39544348.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 89 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2895.0, "completions/max_terminated_length": 2895.0, "completions/mean_length": 1057.27734375, "completions/mean_terminated_length": 1069.8143310546875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.144, "grad_norm": 0.0, "learning_rate": 6.166666666666667e-07, "loss": 0.0, "num_tokens": 40077155.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 90 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 1146.1484375, "completions/mean_terminated_length": 1155.1732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.1456, "grad_norm": 0.0, "learning_rate": 6.111111111111112e-07, "loss": 0.0, "num_tokens": 40626425.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 91 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 1144.28125, "completions/mean_terminated_length": 1162.4444580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1472, "grad_norm": 0.0, "learning_rate": 6.055555555555555e-07, "loss": 0.0, "num_tokens": 41180273.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 92 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 1154.6796875, "completions/mean_terminated_length": 1163.7716064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.1488, "grad_norm": 0.0, "learning_rate": 6e-07, "loss": 0.0, "num_tokens": 41737943.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 93 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2941.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 1111.44921875, "completions/mean_terminated_length": 1129.09130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1504, "grad_norm": 0.0, "learning_rate": 5.944444444444444e-07, "loss": 0.0, "num_tokens": 42279618.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 94 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2985.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 1065.7578125, "completions/mean_terminated_length": 1086.988037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.152, "grad_norm": 0.0, "learning_rate": 5.888888888888889e-07, "loss": 0.0, "num_tokens": 42813884.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 95 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2945.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 1039.0, "completions/mean_terminated_length": 1047.18115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.1536, "grad_norm": 0.0, "learning_rate": 5.833333333333334e-07, "loss": 0.0, "num_tokens": 43341308.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 96 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 1063.22265625, "completions/mean_terminated_length": 1071.594482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 21.0, "epoch": 0.1552, "grad_norm": 0.0, "learning_rate": 5.777777777777777e-07, "loss": 0.0, "num_tokens": 43874309.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 97 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 1113.98828125, "completions/mean_terminated_length": 1127.1976318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.1568, "grad_norm": 0.0, "learning_rate": 5.722222222222222e-07, "loss": 0.0, "num_tokens": 44420186.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 98 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 1128.171875, "completions/mean_terminated_length": 1141.5494384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.1584, "grad_norm": 0.0, "learning_rate": 5.666666666666666e-07, "loss": 0.0, "num_tokens": 44971142.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 99 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2922.0, "completions/max_terminated_length": 2922.0, "completions/mean_length": 1054.0546875, "completions/mean_terminated_length": 1070.7857666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 5.611111111111111e-07, "loss": 0.0, "num_tokens": 45503124.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 100 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2832.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 938.03125, "completions/mean_terminated_length": 949.1541748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.1616, "grad_norm": 0.0, "learning_rate": 5.555555555555555e-07, "loss": 0.0, "num_tokens": 46005404.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 101 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2908.0, "completions/max_terminated_length": 2908.0, "completions/mean_length": 1130.16015625, "completions/mean_terminated_length": 1143.561279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.1632, "grad_norm": 0.0, "learning_rate": 5.5e-07, "loss": 0.0, "num_tokens": 46556869.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 102 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 972.79296875, "completions/mean_terminated_length": 984.328125, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1648, "grad_norm": 0.0, "learning_rate": 5.444444444444443e-07, "loss": 0.0, "num_tokens": 47067784.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 103 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.8, "calib/step_q_w_n": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3061.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 1063.31640625, "completions/mean_terminated_length": 1071.68896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1664, "grad_norm": 0.0, "learning_rate": 5.388888888888888e-07, "loss": 0.0, "num_tokens": 47595849.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 104 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2919.0, "completions/max_terminated_length": 2919.0, "completions/mean_length": 1130.78515625, "completions/mean_terminated_length": 1135.2197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.168, "grad_norm": 0.0, "learning_rate": 5.333333333333333e-07, "loss": 0.0, "num_tokens": 48147322.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 105 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 993.98828125, "completions/mean_terminated_length": 1001.81494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 16.0, "epoch": 0.1696, "grad_norm": 0.0, "learning_rate": 5.277777777777777e-07, "loss": 0.0, "num_tokens": 48663927.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 106 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2848.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 1050.23046875, "completions/mean_terminated_length": 1062.683837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.1712, "grad_norm": 0.0, "learning_rate": 5.222222222222223e-07, "loss": 0.0, "num_tokens": 49194834.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 107 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 1131.703125, "completions/mean_terminated_length": 1145.12255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1728, "grad_norm": 0.0, "learning_rate": 5.166666666666667e-07, "loss": 0.0, "num_tokens": 49745774.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 108 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.98, "calib/step_q_w_n": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 1086.59765625, "completions/mean_terminated_length": 1108.2430419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.1744, "grad_norm": 0.0, "learning_rate": 5.111111111111111e-07, "loss": 0.0, "num_tokens": 50281599.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 109 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2829.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 987.63671875, "completions/mean_terminated_length": 999.3478393554688, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.176, "grad_norm": 0.0, "learning_rate": 5.055555555555555e-07, "loss": 0.0, "num_tokens": 50796578.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 110 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 1035.81640625, "completions/mean_terminated_length": 1043.972412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.1776, "grad_norm": 0.0, "learning_rate": 5e-07, "loss": 0.0, "num_tokens": 51322723.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 111 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2861.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 1091.7734375, "completions/mean_terminated_length": 1100.3701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.1792, "grad_norm": 0.0, "learning_rate": 4.944444444444445e-07, "loss": 0.0, "num_tokens": 51864361.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 112 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 1121.0234375, "completions/mean_terminated_length": 1129.850341796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1808, "grad_norm": 0.0, "learning_rate": 4.888888888888889e-07, "loss": 0.0, "num_tokens": 52408191.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 113 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 1183.1328125, "completions/mean_terminated_length": 1211.528076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.1824, "grad_norm": 0.0, "learning_rate": 4.833333333333333e-07, "loss": 0.0, "num_tokens": 52972689.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 114 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 1078.66796875, "completions/mean_terminated_length": 1082.8980712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.184, "grad_norm": 0.0, "learning_rate": 4.777777777777778e-07, "loss": 0.0, "num_tokens": 53510836.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 115 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 1002.87890625, "completions/mean_terminated_length": 1010.7755737304688, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.1856, "grad_norm": 0.0, "learning_rate": 4.722222222222222e-07, "loss": 0.0, "num_tokens": 54028253.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 116 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 1162.40625, "completions/mean_terminated_length": 1185.561767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.1872, "grad_norm": 0.0, "learning_rate": 4.6666666666666666e-07, "loss": 0.0, "num_tokens": 54583869.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 117 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 1087.0, "completions/mean_terminated_length": 1117.5582275390625, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.1888, "grad_norm": 0.0, "learning_rate": 4.611111111111111e-07, "loss": 0.0, "num_tokens": 55122637.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 118 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 1022.1484375, "completions/mean_terminated_length": 1034.268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.1904, "grad_norm": 0.0, "learning_rate": 4.555555555555555e-07, "loss": 0.0, "num_tokens": 55640435.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 119 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 1080.94921875, "completions/mean_terminated_length": 1098.107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.192, "grad_norm": 0.0, "learning_rate": 4.5e-07, "loss": 0.0, "num_tokens": 56174926.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 120 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 1094.5078125, "completions/mean_terminated_length": 1107.4862060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1936, "grad_norm": 0.0, "learning_rate": 4.444444444444444e-07, "loss": 0.0, "num_tokens": 56712568.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 121 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 1165.0625, "completions/mean_terminated_length": 1174.2362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.1952, "grad_norm": 0.0, "learning_rate": 4.3888888888888884e-07, "loss": 0.0, "num_tokens": 57272272.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 122 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 1099.51953125, "completions/mean_terminated_length": 1125.9080810546875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.1968, "grad_norm": 0.0, "learning_rate": 4.3333333333333335e-07, "loss": 0.0, "num_tokens": 57810981.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 123 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 968.6015625, "completions/mean_terminated_length": 976.2283325195312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.1984, "grad_norm": 0.0, "learning_rate": 4.2777777777777775e-07, "loss": 0.0, "num_tokens": 58319463.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 124 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2826.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 1017.83203125, "completions/mean_terminated_length": 1025.846435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 4.222222222222222e-07, "loss": 0.0, "num_tokens": 58841228.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 125 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 1040.59375, "completions/mean_terminated_length": 1052.932861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.2016, "grad_norm": 0.0, "learning_rate": 4.1666666666666667e-07, "loss": 0.0, "num_tokens": 59369388.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 126 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 1140.87109375, "completions/mean_terminated_length": 1158.980224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.2032, "grad_norm": 0.0, "learning_rate": 4.1111111111111107e-07, "loss": 0.0, "num_tokens": 59923595.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 127 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 1121.69921875, "completions/mean_terminated_length": 1130.531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.2048, "grad_norm": 0.0, "learning_rate": 4.055555555555555e-07, "loss": 0.0, "num_tokens": 60471974.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 128 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 1096.81640625, "completions/mean_terminated_length": 1109.8221435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.2064, "grad_norm": 0.0, "learning_rate": 4e-07, "loss": 0.0, "num_tokens": 61011023.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 129 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 1042.7421875, "completions/mean_terminated_length": 1055.1068115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.208, "grad_norm": 0.0, "learning_rate": 3.9444444444444444e-07, "loss": 0.0, "num_tokens": 61535589.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 130 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 1132.5625, "completions/mean_terminated_length": 1150.539794921875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.2096, "grad_norm": 0.0, "learning_rate": 3.888888888888889e-07, "loss": 0.0, "num_tokens": 62087669.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 131 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 1072.91796875, "completions/mean_terminated_length": 1081.3660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.2112, "grad_norm": 0.0, "learning_rate": 3.8333333333333335e-07, "loss": 0.0, "num_tokens": 62623632.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 132 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 958.64453125, "completions/mean_terminated_length": 977.7410888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.2128, "grad_norm": 0.0, "learning_rate": 3.7777777777777775e-07, "loss": 0.0, "num_tokens": 63127029.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 133 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2972.0, "completions/max_terminated_length": 2972.0, "completions/mean_length": 1142.4140625, "completions/mean_terminated_length": 1151.409423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.2144, "grad_norm": 0.0, "learning_rate": 3.722222222222222e-07, "loss": 0.0, "num_tokens": 63681503.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 134 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 1090.01171875, "completions/mean_terminated_length": 1107.3135986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.216, "grad_norm": 0.0, "learning_rate": 3.666666666666666e-07, "loss": 0.0, "num_tokens": 64222690.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 135 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 961.3125, "completions/mean_terminated_length": 968.8818969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.2176, "grad_norm": 0.0, "learning_rate": 3.6111111111111107e-07, "loss": 0.0, "num_tokens": 64730930.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 136 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 1112.10546875, "completions/mean_terminated_length": 1125.29248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.2192, "grad_norm": 0.0, "learning_rate": 3.5555555555555553e-07, "loss": 0.0, "num_tokens": 65277773.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 137 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 1041.1484375, "completions/mean_terminated_length": 1049.346435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.2208, "grad_norm": 0.0, "learning_rate": 3.5e-07, "loss": 0.0, "num_tokens": 65805883.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 138 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 998.8046875, "completions/mean_terminated_length": 1006.6693115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.2224, "grad_norm": 0.0, "learning_rate": 3.4444444444444444e-07, "loss": 0.0, "num_tokens": 66323385.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 139 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2837.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 1101.73046875, "completions/mean_terminated_length": 1114.7945556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.224, "grad_norm": 0.0, "learning_rate": 3.388888888888889e-07, "loss": 0.0, "num_tokens": 66867004.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 140 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 1091.921875, "completions/mean_terminated_length": 1096.2039794921875, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.2256, "grad_norm": 0.0, "learning_rate": 3.333333333333333e-07, "loss": 0.0, "num_tokens": 67408680.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 141 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 1006.89453125, "completions/mean_terminated_length": 1014.8228149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.2272, "grad_norm": 0.0, "learning_rate": 3.2777777777777776e-07, "loss": 0.0, "num_tokens": 67927597.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 142 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2758.0, "completions/max_terminated_length": 2758.0, "completions/mean_length": 1120.359375, "completions/mean_terminated_length": 1133.644287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2288, "grad_norm": 0.0, "learning_rate": 3.222222222222222e-07, "loss": 0.0, "num_tokens": 68474529.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 143 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 1132.0234375, "completions/mean_terminated_length": 1145.4466552734375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2304, "grad_norm": 0.0, "learning_rate": 3.166666666666666e-07, "loss": 0.0, "num_tokens": 69024839.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 144 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 1088.08984375, "completions/mean_terminated_length": 1096.657470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.232, "grad_norm": 0.0, "learning_rate": 3.111111111111111e-07, "loss": 0.0, "num_tokens": 69565406.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 145 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2842.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 991.71484375, "completions/mean_terminated_length": 1003.474365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.2336, "grad_norm": 0.0, "learning_rate": 3.055555555555556e-07, "loss": 0.0, "num_tokens": 70081429.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 146 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 1031.70703125, "completions/mean_terminated_length": 1039.8306884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.2352, "grad_norm": 0.0, "learning_rate": 3e-07, "loss": 0.0, "num_tokens": 70606626.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 147 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 1101.3203125, "completions/mean_terminated_length": 1118.8016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2368, "grad_norm": 0.0, "learning_rate": 2.9444444444444444e-07, "loss": 0.0, "num_tokens": 71146876.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 148 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2798.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 1010.1484375, "completions/mean_terminated_length": 1018.1023559570312, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.2384, "grad_norm": 0.0, "learning_rate": 2.8888888888888885e-07, "loss": 0.0, "num_tokens": 71667082.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 149 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2975.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 1086.33203125, "completions/mean_terminated_length": 1094.8858642578125, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 2.833333333333333e-07, "loss": 0.0, "num_tokens": 72205943.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 150 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 1060.27734375, "completions/mean_terminated_length": 1068.6259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2416, "grad_norm": 0.0, "learning_rate": 2.7777777777777776e-07, "loss": 0.0, "num_tokens": 72739254.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 151 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2894.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 1052.00390625, "completions/mean_terminated_length": 1060.287353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.2432, "grad_norm": 0.0, "learning_rate": 2.7222222222222216e-07, "loss": 0.0, "num_tokens": 73268375.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 152 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 1053.90234375, "completions/mean_terminated_length": 1062.2008056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.2448, "grad_norm": 0.0, "learning_rate": 2.6666666666666667e-07, "loss": 0.0, "num_tokens": 73799910.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 153 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2934.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 1075.3828125, "completions/mean_terminated_length": 1083.850341796875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.2464, "grad_norm": 0.0, "learning_rate": 2.6111111111111113e-07, "loss": 0.0, "num_tokens": 74337352.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 154 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2837.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 1098.609375, "completions/mean_terminated_length": 1107.2598876953125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.248, "grad_norm": 0.0, "learning_rate": 2.5555555555555553e-07, "loss": 0.0, "num_tokens": 74880468.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 155 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2976.0, "completions/max_terminated_length": 2976.0, "completions/mean_length": 1070.65234375, "completions/mean_terminated_length": 1083.347900390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.2496, "grad_norm": 0.0, "learning_rate": 2.5e-07, "loss": 0.0, "num_tokens": 75416699.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 156 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2859.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 1022.59375, "completions/mean_terminated_length": 1042.9642333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.2512, "grad_norm": 0.0, "learning_rate": 2.4444444444444445e-07, "loss": 0.0, "num_tokens": 75939691.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 157 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2893.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 1023.33984375, "completions/mean_terminated_length": 1031.3975830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.2528, "grad_norm": 0.0, "learning_rate": 2.388888888888889e-07, "loss": 0.0, "num_tokens": 76463618.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 158 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2825.0, "completions/max_terminated_length": 2825.0, "completions/mean_length": 1109.16796875, "completions/mean_terminated_length": 1117.901611328125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2544, "grad_norm": 0.0, "learning_rate": 2.3333333333333333e-07, "loss": 0.0, "num_tokens": 77009709.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 159 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 1154.60546875, "completions/mean_terminated_length": 1163.6968994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.256, "grad_norm": 0.0, "learning_rate": 2.2777777777777776e-07, "loss": 0.0, "num_tokens": 77563432.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 160 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 1136.26953125, "completions/mean_terminated_length": 1154.3056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.2576, "grad_norm": 0.0, "learning_rate": 2.222222222222222e-07, "loss": 0.0, "num_tokens": 78112093.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 161 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 1101.1328125, "completions/mean_terminated_length": 1123.0677490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.2592, "grad_norm": 0.0, "learning_rate": 2.1666666666666667e-07, "loss": 0.0, "num_tokens": 78651087.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 162 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 1056.65625, "completions/mean_terminated_length": 1069.185791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.2608, "grad_norm": 0.0, "learning_rate": 2.111111111111111e-07, "loss": 0.0, "num_tokens": 79180167.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 163 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 1163.8125, "completions/mean_terminated_length": 1172.976318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.2624, "grad_norm": 0.0, "learning_rate": 2.0555555555555553e-07, "loss": 0.0, "num_tokens": 79739663.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 164 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2948.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 1224.3828125, "completions/mean_terminated_length": 1238.9012451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.264, "grad_norm": 0.0, "learning_rate": 2e-07, "loss": 0.0, "num_tokens": 80314225.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 165 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 1024.66015625, "completions/mean_terminated_length": 1032.7283935546875, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.2656, "grad_norm": 0.0, "learning_rate": 1.9444444444444445e-07, "loss": 0.0, "num_tokens": 80835002.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 166 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2798.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 1034.26171875, "completions/mean_terminated_length": 1042.405517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.2672, "grad_norm": 0.0, "learning_rate": 1.8888888888888888e-07, "loss": 0.0, "num_tokens": 81361653.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 167 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2841.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 1116.234375, "completions/mean_terminated_length": 1133.952392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.2688, "grad_norm": 0.0, "learning_rate": 1.833333333333333e-07, "loss": 0.0, "num_tokens": 81909553.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 168 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2944.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 1004.765625, "completions/mean_terminated_length": 1016.6798706054688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.2704, "grad_norm": 0.0, "learning_rate": 1.7777777777777776e-07, "loss": 0.0, "num_tokens": 82428069.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 169 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.00390625, "calib/ece": 0.8, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.8, "calib/mu_c": NaN, "calib/mu_w": 0.8, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.8, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.75, "calib/step_q_w_n": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 1091.875, "completions/mean_terminated_length": 1104.8221435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.272, "grad_norm": 0.0, "learning_rate": 1.7222222222222222e-07, "loss": 0.0, "num_tokens": 82960605.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 170 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 960.78515625, "completions/mean_terminated_length": 968.3504028320312, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.2736, "grad_norm": 0.0, "learning_rate": 1.6666666666666665e-07, "loss": 0.0, "num_tokens": 83465222.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 171 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 1178.6875, "completions/mean_terminated_length": 1192.6640625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.2752, "grad_norm": 0.0, "learning_rate": 1.611111111111111e-07, "loss": 0.0, "num_tokens": 84027150.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 172 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 1210.18359375, "completions/mean_terminated_length": 1224.53369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.2768, "grad_norm": 0.0, "learning_rate": 1.5555555555555556e-07, "loss": 0.0, "num_tokens": 84597773.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 173 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 1143.296875, "completions/mean_terminated_length": 1156.853759765625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.2784, "grad_norm": 0.0, "learning_rate": 1.5e-07, "loss": 0.0, "num_tokens": 85152601.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 174 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 1112.11328125, "completions/mean_terminated_length": 1129.7659912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.4444444444444442e-07, "loss": 0.0, "num_tokens": 85697078.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 175 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 1048.34375, "completions/mean_terminated_length": 1073.5040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.2816, "grad_norm": 0.0, "learning_rate": 1.3888888888888888e-07, "loss": 0.0, "num_tokens": 86224110.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 176 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 1140.140625, "completions/mean_terminated_length": 1149.1181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.2832, "grad_norm": 0.0, "learning_rate": 1.3333333333333334e-07, "loss": 0.0, "num_tokens": 86778130.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 177 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 1183.52734375, "completions/mean_terminated_length": 1192.846435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.2848, "grad_norm": 0.0, "learning_rate": 1.2777777777777777e-07, "loss": 0.0, "num_tokens": 87343257.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 178 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2986.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 1141.56640625, "completions/mean_terminated_length": 1159.6865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.2864, "grad_norm": 0.0, "learning_rate": 1.2222222222222222e-07, "loss": 0.0, "num_tokens": 87891170.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 179 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 1196.1484375, "completions/mean_terminated_length": 1210.33203125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.288, "grad_norm": 0.0, "learning_rate": 1.1666666666666667e-07, "loss": 0.0, "num_tokens": 88455328.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 180 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 1195.22265625, "completions/mean_terminated_length": 1204.6339111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 28.0, "epoch": 0.2896, "grad_norm": 0.0, "learning_rate": 1.111111111111111e-07, "loss": 0.0, "num_tokens": 89018161.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 181 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 1099.7421875, "completions/mean_terminated_length": 1108.401611328125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.2912, "grad_norm": 0.0, "learning_rate": 1.0555555555555555e-07, "loss": 0.0, "num_tokens": 89561495.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 182 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 1136.4921875, "completions/mean_terminated_length": 1140.9490966796875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.2928, "grad_norm": 0.0, "learning_rate": 1e-07, "loss": 0.0, "num_tokens": 90113261.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 183 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 1074.80078125, "completions/mean_terminated_length": 1083.2637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.2944, "grad_norm": 0.0, "learning_rate": 9.444444444444444e-08, "loss": 0.0, "num_tokens": 90648722.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 184 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2894.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 1060.1875, "completions/mean_terminated_length": 1072.7589111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.296, "grad_norm": 0.0, "learning_rate": 8.888888888888888e-08, "loss": 0.0, "num_tokens": 91181714.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 185 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 1102.87890625, "completions/mean_terminated_length": 1115.95654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.2976, "grad_norm": 0.0, "learning_rate": 8.333333333333333e-08, "loss": 0.0, "num_tokens": 91726195.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 186 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 1163.03515625, "completions/mean_terminated_length": 1172.19287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.2992, "grad_norm": 0.0, "learning_rate": 7.777777777777778e-08, "loss": 0.0, "num_tokens": 92285100.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 187 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2896.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 1062.48828125, "completions/mean_terminated_length": 1079.353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.3008, "grad_norm": 0.0, "learning_rate": 7.222222222222221e-08, "loss": 0.0, "num_tokens": 92814521.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 188 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 1209.0078125, "completions/mean_terminated_length": 1213.7491455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.3024, "grad_norm": 0.0, "learning_rate": 6.666666666666667e-08, "loss": 0.0, "num_tokens": 93383731.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 189 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 1079.34375, "completions/mean_terminated_length": 1096.4761962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.304, "grad_norm": 0.0, "learning_rate": 6.111111111111111e-08, "loss": 0.0, "num_tokens": 93911275.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 190 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 1036.34765625, "completions/mean_terminated_length": 1044.5078125, "completions/min_length": 0.0, "completions/min_terminated_length": 19.0, "epoch": 0.3056, "grad_norm": 0.0, "learning_rate": 5.555555555555555e-08, "loss": 0.0, "num_tokens": 94438436.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 191 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2859.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 1163.59765625, "completions/mean_terminated_length": 1177.395263671875, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.3072, "grad_norm": 0.0, "learning_rate": 5e-08, "loss": 0.0, "num_tokens": 94998461.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 192 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 1036.12890625, "completions/mean_terminated_length": 1044.287353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.3088, "grad_norm": 0.0, "learning_rate": 4.444444444444444e-08, "loss": 0.0, "num_tokens": 95525406.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 193 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2952.0, "completions/max_terminated_length": 2952.0, "completions/mean_length": 1050.99609375, "completions/mean_terminated_length": 1055.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.3104, "grad_norm": 0.0, "learning_rate": 3.888888888888889e-08, "loss": 0.0, "num_tokens": 96055093.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 194 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 1149.95703125, "completions/mean_terminated_length": 1163.5928955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.312, "grad_norm": 0.0, "learning_rate": 3.3333333333333334e-08, "loss": 0.0, "num_tokens": 96608714.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 195 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 1134.48828125, "completions/mean_terminated_length": 1138.9373779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.3136, "grad_norm": 0.0, "learning_rate": 2.7777777777777774e-08, "loss": 0.0, "num_tokens": 97161287.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 196 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 1116.45703125, "completions/mean_terminated_length": 1129.6956787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.3152, "grad_norm": 0.0, "learning_rate": 2.222222222222222e-08, "loss": 0.0, "num_tokens": 97707196.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 197 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 1100.80859375, "completions/mean_terminated_length": 1113.8616943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.3168, "grad_norm": 0.0, "learning_rate": 1.6666666666666667e-08, "loss": 0.0, "num_tokens": 98251147.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 198 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2881.0, "completions/max_terminated_length": 2881.0, "completions/mean_length": 1063.796875, "completions/mean_terminated_length": 1080.6826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.3184, "grad_norm": 0.0, "learning_rate": 1.111111111111111e-08, "loss": 0.0, "num_tokens": 98785623.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 199 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 1158.99609375, "completions/mean_terminated_length": 1172.7391357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 5.555555555555555e-09, "loss": 0.0, "num_tokens": 99341446.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0, "step": 200 }, { "epoch": 0.32, "step": 200, "total_flos": 0.0, "train_loss": -0.01405992767540738, "train_runtime": 12749.9065, "train_samples_per_second": 4.016, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 99341446, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }