{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.32, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7847598522167488, "calib/avg_num_step_conf": 3.85546875, "calib/ece": 0.31390625000000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.53515625, "calib/gap": 0.16190147783251252, "calib/mean_conf": 0.86078125, "calib/mu_c": 0.9341428571428574, "calib/mu_w": 0.7722413793103449, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31390625000000005, "calib/std_conf": 0.18074762003533407, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8456213017751478, "calib/step_q_c_n": 507.0, "calib/step_q_gap": 0.12451713510848117, "calib/step_q_w": 0.7211041666666667, "calib/step_q_w_n": 480.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0016, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.0, "num_tokens": 252032.0, "reward": 1.046875, "reward_std": 0.15702980756759644, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 1.0, "step": 1 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6568917668825163, "calib/avg_num_step_conf": 4.16015625, "calib/ece": 0.4085156250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.51953125, "calib/gap": 0.10610299105766285, "calib/mean_conf": 0.8577343749999999, "calib/mu_c": 0.9161739130434784, "calib/mu_w": 0.8100709219858155, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4085156250000001, "calib/std_conf": 0.1895294949166471, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7985778781038374, "calib/step_q_c_n": 443.0, "calib/step_q_gap": 0.05455858549933579, "calib/step_q_w": 0.7440192926045016, "calib/step_q_w_n": 622.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0032, "grad_norm": 0.0, "learning_rate": 5e-08, "loss": 0.0, "num_tokens": 511736.0, "reward": 0.9453125, "reward_std": 0.17519709467887878, "rewards/accuracy_reward_step": 0.44921875, "rewards/format_reward_step": 0.9921875, "step": 2 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6860458015267176, "calib/avg_num_step_conf": 3.94921875, "calib/ece": 0.40558593750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.625, "calib/gap": 0.08264854961832058, "calib/mean_conf": 0.8938671874999999, "calib/mu_c": 0.93616, "calib/mu_w": 0.8535114503816794, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.40558593750000005, "calib/std_conf": 0.14239951891014183, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8439232409381665, "calib/step_q_c_n": 469.0, "calib/step_q_gap": 0.05460589776473479, "calib/step_q_w": 0.7893173431734317, "calib/step_q_w_n": 542.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0048, "grad_norm": 0.0, "learning_rate": 1e-07, "loss": 0.0, "num_tokens": 767128.0, "reward": 0.986328125, "reward_std": 0.15006008744239807, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.99609375, "step": 3 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7235286798556676, "calib/avg_num_step_conf": 4.0, "calib/ece": 0.3897647058823531, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.5529411764705883, "calib/gap": 0.17866368047779035, "calib/mean_conf": 0.8368235294117646, "calib/mu_c": 0.9356140350877193, "calib/mu_w": 0.756950354609929, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3897647058823531, "calib/std_conf": 0.2336053909096557, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8339447236180904, "calib/step_q_c_n": 398.0, "calib/step_q_gap": 0.12412044246793053, "calib/step_q_w": 0.7098242811501598, "calib/step_q_w_n": 626.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0064, "grad_norm": 0.0, "learning_rate": 1.5e-07, "loss": 0.0, "num_tokens": 1024896.0, "reward": 0.94140625, "reward_std": 0.16598647832870483, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.9921875, "step": 4 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7077480042081812, "calib/avg_num_step_conf": 4.08203125, "calib/ece": 0.4300390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.640625, "calib/gap": 0.1347676217587721, "calib/mean_conf": 0.8714453124999999, "calib/mu_c": 0.9467256637168141, "calib/mu_w": 0.811958041958042, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4300390625, "calib/std_conf": 0.20415379116435076, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8484027777777777, "calib/step_q_c_n": 432.0, "calib/step_q_gap": 0.07176329980061613, "calib/step_q_w": 0.7766394779771616, "calib/step_q_w_n": 613.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.008, "grad_norm": 0.0, "learning_rate": 2e-07, "loss": 0.0, "num_tokens": 1285536.0, "reward": 0.94140625, "reward_std": 0.12164628505706787, "rewards/accuracy_reward_step": 0.44140625, "rewards/format_reward_step": 1.0, "step": 5 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6461634047840944, "calib/avg_num_step_conf": 4.109375, "calib/ece": 0.35417968750000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.71875, "calib/gap": 0.04493383038210619, "calib/mean_conf": 0.9205859374999998, "calib/mu_c": 0.9400689655172414, "calib/mu_w": 0.8951351351351352, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35417968750000006, "calib/std_conf": 0.10330877771150955, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8379241877256318, "calib/step_q_c_n": 554.0, "calib/step_q_gap": 0.04374748089832259, "calib/step_q_w": 0.7941767068273092, "calib/step_q_w_n": 498.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0096, "grad_norm": 0.0, "learning_rate": 2.5e-07, "loss": 0.0, "num_tokens": 1547088.0, "reward": 1.064453125, "reward_std": 0.23002484440803528, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.99609375, "step": 6 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7235908921239268, "calib/avg_num_step_conf": 3.8828125, "calib/ece": 0.28709803921568644, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6235294117647059, "calib/gap": 0.21042926465098943, "calib/mean_conf": 0.8400392156862744, "calib/mu_c": 0.9341134751773053, "calib/mu_w": 0.7236842105263158, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.28709803921568644, "calib/std_conf": 0.24094177644941636, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8277351247600768, "calib/step_q_c_n": 521.0, "calib/step_q_gap": 0.17913047359728618, "calib/step_q_w": 0.6486046511627906, "calib/step_q_w_n": 473.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 0.33984375, "completions/mean_terminated_length": 87.0, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.0112, "grad_norm": 2.3416316509246826, "learning_rate": 3e-07, "loss": 0.0211, "num_tokens": 1807055.0, "reward": 1.044921875, "reward_std": 0.19384272396564484, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.98828125, "step": 7 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5795014208214931, "calib/avg_num_step_conf": 3.76953125, "calib/ece": 0.3051953124999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.73046875, "calib/gap": 0.04023378971841929, "calib/mean_conf": 0.9223828124999999, "calib/mu_c": 0.9377848101265823, "calib/mu_w": 0.897551020408163, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3051953124999999, "calib/std_conf": 0.08503782440531885, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8459363957597174, "calib/step_q_c_n": 566.0, "calib/step_q_gap": 0.05295393961936656, "calib/step_q_w": 0.7929824561403508, "calib/step_q_w_n": 399.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0128, "grad_norm": 0.0, "learning_rate": 3.5e-07, "loss": 0.0, "num_tokens": 2068111.0, "reward": 1.1171875, "reward_std": 0.13743899762630463, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 1.0, "step": 8 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6410148584244462, "calib/avg_num_step_conf": 4.30078125, "calib/ece": 0.4741406250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.45703125, "calib/gap": 0.16461732548359975, "calib/mean_conf": 0.794453125, "calib/mu_c": 0.9063414634146343, "calib/mu_w": 0.7417241379310345, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4741406250000001, "calib/std_conf": 0.26864331776118006, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8122873900293255, "calib/step_q_c_n": 341.0, "calib/step_q_gap": 0.11049791634511497, "calib/step_q_w": 0.7017894736842105, "calib/step_q_w_n": 760.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0144, "grad_norm": 0.0, "learning_rate": 4e-07, "loss": 0.0, "num_tokens": 2329039.0, "reward": 0.818359375, "reward_std": 0.13821910321712494, "rewards/accuracy_reward_step": 0.3203125, "rewards/format_reward_step": 0.99609375, "step": 9 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6659653195966044, "calib/avg_num_step_conf": 4.1171875, "calib/ece": 0.5887500000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.59375, "calib/gap": 0.06976420227816882, "calib/mean_conf": 0.88953125, "calib/mu_c": 0.9383116883116885, "calib/mu_w": 0.8685474860335197, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5887500000000001, "calib/std_conf": 0.14614504447102372, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8195847750865053, "calib/step_q_c_n": 289.0, "calib/step_q_gap": 0.056578239138792785, "calib/step_q_w": 0.7630065359477125, "calib/step_q_w_n": 765.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.016, "grad_norm": 0.0, "learning_rate": 4.5e-07, "loss": 0.0, "num_tokens": 2588767.0, "reward": 0.798828125, "reward_std": 0.20692811906337738, "rewards/accuracy_reward_step": 0.30078125, "rewards/format_reward_step": 0.99609375, "step": 10 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6704088050314465, "calib/avg_num_step_conf": 4.36328125, "calib/ece": 0.4537890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5703125, "calib/gap": 0.11170188679245274, "calib/mean_conf": 0.8678515625, "calib/mu_c": 0.9333018867924529, "calib/mu_w": 0.8216000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4537890625, "calib/std_conf": 0.1801704743883098, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8169249394673123, "calib/step_q_c_n": 413.0, "calib/step_q_gap": 0.05183971219458494, "calib/step_q_w": 0.7650852272727273, "calib/step_q_w_n": 704.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0176, "grad_norm": 0.0, "learning_rate": 5e-07, "loss": 0.0, "num_tokens": 2845111.0, "reward": 0.912109375, "reward_std": 0.1373104453086853, "rewards/accuracy_reward_step": 0.4140625, "rewards/format_reward_step": 0.99609375, "step": 11 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7028461019444274, "calib/avg_num_step_conf": 4.05078125, "calib/ece": 0.315859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.62109375, "calib/gap": 0.1911145188002208, "calib/mean_conf": 0.8510156249999998, "calib/mu_c": 0.9398540145985402, "calib/mu_w": 0.7487394957983194, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.315859375, "calib/std_conf": 0.24001249916589634, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8274908424908425, "calib/step_q_c_n": 546.0, "calib/step_q_gap": 0.10502648403870396, "calib/step_q_w": 0.7224643584521385, "calib/step_q_w_n": 491.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0192, "grad_norm": 0.0, "learning_rate": 5.5e-07, "loss": 0.0, "num_tokens": 3104999.0, "reward": 1.033203125, "reward_std": 0.2238508015871048, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.99609375, "step": 12 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6854516386890488, "calib/avg_num_step_conf": 3.96484375, "calib/ece": 0.37953125000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.65625, "calib/gap": 0.19506856053618626, "calib/mean_conf": 0.8365624999999999, "calib/mu_c": 0.9424786324786323, "calib/mu_w": 0.747410071942446, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37953125000000004, "calib/std_conf": 0.27608857698888956, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8363274336283185, "calib/step_q_c_n": 452.0, "calib/step_q_gap": 0.12551038211854937, "calib/step_q_w": 0.7108170515097691, "calib/step_q_w_n": 563.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0208, "grad_norm": 0.0, "learning_rate": 6e-07, "loss": 0.0, "num_tokens": 3366951.0, "reward": 0.95703125, "reward_std": 0.16584046185016632, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 1.0, "step": 13 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.744658708295072, "calib/avg_num_step_conf": 4.12890625, "calib/ece": 0.3450781250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6015625, "calib/gap": 0.14579614325068901, "calib/mean_conf": 0.8724218749999999, "calib/mu_c": 0.9413333333333335, "calib/mu_w": 0.7955371900826445, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3450781250000001, "calib/std_conf": 0.17242637058026936, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8381312127236581, "calib/step_q_c_n": 503.0, "calib/step_q_gap": 0.10291460622546311, "calib/step_q_w": 0.735216606498195, "calib/step_q_w_n": 554.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0224, "grad_norm": 0.0, "learning_rate": 6.5e-07, "loss": 0.0, "num_tokens": 3628071.0, "reward": 1.0234375, "reward_std": 0.08982988446950912, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.9921875, "step": 14 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7537163645487048, "calib/avg_num_step_conf": 3.9296875, "calib/ece": 0.4614453125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.62109375, "calib/gap": 0.11260427773944692, "calib/mean_conf": 0.8794140625, "calib/mu_c": 0.9449532710280375, "calib/mu_w": 0.8323489932885906, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4614453125000001, "calib/std_conf": 0.1775507421900739, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.851424802110818, "calib/step_q_c_n": 379.0, "calib/step_q_gap": 0.0865603682990157, "calib/step_q_w": 0.7648644338118022, "calib/step_q_w_n": 627.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.024, "grad_norm": 0.0, "learning_rate": 7e-07, "loss": 0.0, "num_tokens": 3888863.0, "reward": 0.916015625, "reward_std": 0.14187777042388916, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.99609375, "step": 15 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6735380474675801, "calib/avg_num_step_conf": 4.2734375, "calib/ece": 0.358984375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.64453125, "calib/gap": 0.09122339124051859, "calib/mean_conf": 0.8824218749999999, "calib/mu_c": 0.9258955223880597, "calib/mu_w": 0.8346721311475411, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.358984375, "calib/std_conf": 0.15622372826009617, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8350915750915752, "calib/step_q_c_n": 546.0, "calib/step_q_gap": 0.04899668458062634, "calib/step_q_w": 0.7860948905109488, "calib/step_q_w_n": 548.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0256, "grad_norm": 0.0, "learning_rate": 7.5e-07, "loss": 0.0, "num_tokens": 4149623.0, "reward": 1.021484375, "reward_std": 0.16464470326900482, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.99609375, "step": 16 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6859403729971106, "calib/avg_num_step_conf": 4.03515625, "calib/ece": 0.4659375000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.4921875, "calib/gap": 0.14501707381140017, "calib/mean_conf": 0.8331249999999999, "calib/mu_c": 0.9248936170212767, "calib/mu_w": 0.7798765432098765, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4659375000000001, "calib/std_conf": 0.22050279901851585, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8268684210526317, "calib/step_q_c_n": 380.0, "calib/step_q_gap": 0.10181482227774652, "calib/step_q_w": 0.7250535987748852, "calib/step_q_w_n": 653.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0272, "grad_norm": 0.0, "learning_rate": 8e-07, "loss": 0.0, "num_tokens": 4398623.0, "reward": 0.8671875, "reward_std": 0.1536148637533188, "rewards/accuracy_reward_step": 0.3671875, "rewards/format_reward_step": 1.0, "step": 17 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6764777798153921, "calib/avg_num_step_conf": 4.45703125, "calib/ece": 0.367109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.52734375, "calib/gap": 0.12922672534996027, "calib/mean_conf": 0.847578125, "calib/mu_c": 0.9147154471544716, "calib/mu_w": 0.7854887218045113, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.367109375, "calib/std_conf": 0.18590045258547483, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8189463220675944, "calib/step_q_c_n": 503.0, "calib/step_q_gap": 0.07509052269455374, "calib/step_q_w": 0.7438557993730407, "calib/step_q_w_n": 638.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0288, "grad_norm": 0.0, "learning_rate": 8.499999999999999e-07, "loss": 0.0, "num_tokens": 4659959.0, "reward": 0.98046875, "reward_std": 0.22134128212928772, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 1.0, "step": 18 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7079899896233901, "calib/avg_num_step_conf": 3.94921875, "calib/ece": 0.367421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.58984375, "calib/gap": 0.1344381370933284, "calib/mean_conf": 0.8635156249999999, "calib/mu_c": 0.9312598425196851, "calib/mu_w": 0.7968217054263567, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.367421875, "calib/std_conf": 0.20059322117374598, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8341860465116279, "calib/step_q_c_n": 473.0, "calib/step_q_gap": 0.0985169015302153, "calib/step_q_w": 0.7356691449814126, "calib/step_q_w_n": 538.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0304, "grad_norm": 0.0, "learning_rate": 9e-07, "loss": 0.0, "num_tokens": 4920623.0, "reward": 0.994140625, "reward_std": 0.18043741583824158, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.99609375, "step": 19 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6436936936936936, "calib/avg_num_step_conf": 3.98046875, "calib/ece": 0.4508695652173912, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5731225296442688, "calib/gap": 0.08956949806949799, "calib/mean_conf": 0.865889328063241, "calib/mu_c": 0.9182857142857143, "calib/mu_w": 0.8287162162162163, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4508695652173912, "calib/std_conf": 0.17095552089144098, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8316372795969772, "calib/step_q_c_n": 397.0, "calib/step_q_gap": 0.06138004487028903, "calib/step_q_w": 0.7702572347266882, "calib/step_q_w_n": 622.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 280.0, "completions/max_terminated_length": 280.0, "completions/mean_length": 1.80859375, "completions/mean_terminated_length": 154.33334350585938, "completions/min_length": 0.0, "completions/min_terminated_length": 38.0, "epoch": 0.032, "grad_norm": 4.076399326324463, "learning_rate": 9.499999999999999e-07, "loss": 0.0987, "num_tokens": 5181646.0, "reward": 0.900390625, "reward_std": 0.2040051966905594, "rewards/accuracy_reward_step": 0.41015625, "rewards/format_reward_step": 0.98046875, "step": 20 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6870080064589921, "calib/avg_num_step_conf": 4.359375, "calib/ece": 0.497578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.55078125, "calib/gap": 0.12012245172576197, "calib/mean_conf": 0.845234375, "calib/mu_c": 0.9235955056179775, "calib/mu_w": 0.8034730538922156, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.497578125, "calib/std_conf": 0.21170241807395437, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8163478260869564, "calib/step_q_c_n": 345.0, "calib/step_q_gap": 0.06533615293520545, "calib/step_q_w": 0.751011673151751, "calib/step_q_w_n": 771.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0336, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 5439662.0, "reward": 0.84765625, "reward_std": 0.11139655113220215, "rewards/accuracy_reward_step": 0.34765625, "rewards/format_reward_step": 1.0, "step": 21 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6988704867060093, "calib/avg_num_step_conf": 4.0625, "calib/ece": 0.4556640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.53515625, "calib/gap": 0.1274427311377626, "calib/mean_conf": 0.8580078124999999, "calib/mu_c": 0.9341747572815535, "calib/mu_w": 0.8067320261437909, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4556640625, "calib/std_conf": 0.18236386172694644, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8324668435013264, "calib/step_q_c_n": 377.0, "calib/step_q_gap": 0.10385447547719373, "calib/step_q_w": 0.7286123680241327, "calib/step_q_w_n": 663.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0352, "grad_norm": 0.0, "learning_rate": 9.944444444444444e-07, "loss": 0.0, "num_tokens": 5701750.0, "reward": 0.900390625, "reward_std": 0.15003961324691772, "rewards/accuracy_reward_step": 0.40234375, "rewards/format_reward_step": 0.99609375, "step": 22 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.693142361111111, "calib/avg_num_step_conf": 3.8671875, "calib/ece": 0.43999999999999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.6953125, "calib/gap": 0.12210317460317455, "calib/mean_conf": 0.87515625, "calib/mu_c": 0.9438392857142858, "calib/mu_w": 0.8217361111111112, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4388281249999999, "calib/std_conf": 0.2005519118231923, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.865979381443299, "calib/step_q_c_n": 388.0, "calib/step_q_gap": 0.10102921533034215, "calib/step_q_w": 0.7649501661129569, "calib/step_q_w_n": 602.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0368, "grad_norm": 0.0, "learning_rate": 9.88888888888889e-07, "loss": 0.0, "num_tokens": 5961574.0, "reward": 0.9375, "reward_std": 0.09271685779094696, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 1.0, "step": 23 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7096158658658658, "calib/avg_num_step_conf": 3.7578125, "calib/ece": 0.4128125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.59765625, "calib/gap": 0.1812012012012013, "calib/mean_conf": 0.8346874999999999, "calib/mu_c": 0.9394444444444445, "calib/mu_w": 0.7582432432432432, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4128125, "calib/std_conf": 0.24072623682048033, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8311286089238846, "calib/step_q_c_n": 381.0, "calib/step_q_gap": 0.10574134558481396, "calib/step_q_w": 0.7253872633390707, "calib/step_q_w_n": 581.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0384, "grad_norm": 0.0, "learning_rate": 9.833333333333332e-07, "loss": 0.0, "num_tokens": 6219486.0, "reward": 0.91796875, "reward_std": 0.15253356099128723, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.9921875, "step": 24 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6846522781774581, "calib/avg_num_step_conf": 4.0703125, "calib/ece": 0.385234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.546875, "calib/gap": 0.11813072618828024, "calib/mean_conf": 0.8285937499999999, "calib/mu_c": 0.8927350427350429, "calib/mu_w": 0.7746043165467627, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3783984375, "calib/std_conf": 0.2325579039958382, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8196636771300448, "calib/step_q_c_n": 446.0, "calib/step_q_gap": 0.09520058988172275, "calib/step_q_w": 0.7244630872483221, "calib/step_q_w_n": 596.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 9.777777777777778e-07, "loss": 0.0, "num_tokens": 6480774.0, "reward": 0.95703125, "reward_std": 0.14453580975532532, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 1.0, "step": 25 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6871333964049195, "calib/avg_num_step_conf": 3.8984375, "calib/ece": 0.4467187499999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.57421875, "calib/gap": 0.13802081362346263, "calib/mean_conf": 0.8568749999999999, "calib/mu_c": 0.9382857142857143, "calib/mu_w": 0.8002649006622516, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4467187499999999, "calib/std_conf": 0.21402851007517668, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8427653631284916, "calib/step_q_c_n": 358.0, "calib/step_q_gap": 0.10604661312849162, "calib/step_q_w": 0.73671875, "calib/step_q_w_n": 640.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0416, "grad_norm": 0.0, "learning_rate": 9.722222222222222e-07, "loss": 0.0, "num_tokens": 6740750.0, "reward": 0.90625, "reward_std": 0.1425735503435135, "rewards/accuracy_reward_step": 0.41015625, "rewards/format_reward_step": 0.9921875, "step": 26 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7142270861833105, "calib/avg_num_step_conf": 3.91796875, "calib/ece": 0.45214843750000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.3984375, "calib/gap": 0.1802612859097128, "calib/mean_conf": 0.7880859375, "calib/mu_c": 0.9077906976744187, "calib/mu_w": 0.7275294117647059, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.45214843750000006, "calib/std_conf": 0.26491863513114, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7893265993265993, "calib/step_q_c_n": 297.0, "calib/step_q_gap": 0.10204614606880891, "calib/step_q_w": 0.6872804532577904, "calib/step_q_w_n": 706.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0432, "grad_norm": 0.0, "learning_rate": 9.666666666666666e-07, "loss": 0.0, "num_tokens": 7002198.0, "reward": 0.83203125, "reward_std": 0.1539076715707779, "rewards/accuracy_reward_step": 0.3359375, "rewards/format_reward_step": 0.9921875, "step": 27 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6436518124579743, "calib/avg_num_step_conf": 3.91796875, "calib/ece": 0.400703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.58984375, "calib/gap": 0.09549238951036132, "calib/mean_conf": 0.8784375, "calib/mu_c": 0.9280487804878049, "calib/mu_w": 0.8325563909774436, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3993359375, "calib/std_conf": 0.18154474956260783, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.809004329004329, "calib/step_q_c_n": 462.0, "calib/step_q_gap": 0.04501172271967102, "calib/step_q_w": 0.7639926062846579, "calib/step_q_w_n": 541.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0448, "grad_norm": 0.0, "learning_rate": 9.61111111111111e-07, "loss": 0.0, "num_tokens": 7258086.0, "reward": 0.974609375, "reward_std": 0.14191211760044098, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.98828125, "step": 28 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5710377358490567, "calib/avg_num_step_conf": 4.1171875, "calib/ece": 0.47949218749999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.60546875, "calib/gap": 0.025973584905660374, "calib/mean_conf": 0.8935546875, "calib/mu_c": 0.9087735849056605, "calib/mu_w": 0.8828000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.47949218749999994, "calib/std_conf": 0.1359380780788714, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8161576354679804, "calib/step_q_c_n": 406.0, "calib/step_q_gap": 0.013565042875387845, "calib/step_q_w": 0.8025925925925925, "calib/step_q_w_n": 648.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0464, "grad_norm": 0.0, "learning_rate": 9.555555555555556e-07, "loss": 0.0, "num_tokens": 7518990.0, "reward": 0.9140625, "reward_std": 0.20437544584274292, "rewards/accuracy_reward_step": 0.4140625, "rewards/format_reward_step": 1.0, "step": 29 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7290471785383904, "calib/avg_num_step_conf": 3.73828125, "calib/ece": 0.3382812500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.47265625, "calib/gap": 0.23330496453900718, "calib/mean_conf": 0.7875, "calib/mu_c": 0.916, "calib/mu_w": 0.6826950354609929, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3382812500000001, "calib/std_conf": 0.28351725035701086, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7971393643031784, "calib/step_q_c_n": 409.0, "calib/step_q_gap": 0.14967586065354344, "calib/step_q_w": 0.647463503649635, "calib/step_q_w_n": 548.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.048, "grad_norm": 0.0, "learning_rate": 9.499999999999999e-07, "loss": 0.0, "num_tokens": 7779406.0, "reward": 0.9453125, "reward_std": 0.15570057928562164, "rewards/accuracy_reward_step": 0.44921875, "rewards/format_reward_step": 0.9921875, "step": 30 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6527393469839512, "calib/avg_num_step_conf": 4.13671875, "calib/ece": 0.4433984375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.609375, "calib/gap": 0.07098874746356765, "calib/mean_conf": 0.9004296875, "calib/mu_c": 0.9389743589743591, "calib/mu_w": 0.8679856115107915, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4433984375000001, "calib/std_conf": 0.12497582163623627, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8435280898876405, "calib/step_q_c_n": 445.0, "calib/step_q_gap": 0.0760362332101161, "calib/step_q_w": 0.7674918566775244, "calib/step_q_w_n": 614.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0496, "grad_norm": 0.0, "learning_rate": 9.444444444444444e-07, "loss": 0.0, "num_tokens": 8037134.0, "reward": 0.95703125, "reward_std": 0.23197564482688904, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 1.0, "step": 31 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6696042150159944, "calib/avg_num_step_conf": 3.9765625, "calib/ece": 0.4343750000000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6015625, "calib/gap": 0.14466286144389395, "calib/mean_conf": 0.85234375, "calib/mu_c": 0.9365420560747665, "calib/mu_w": 0.7918791946308725, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4343750000000002, "calib/std_conf": 0.2301612355196624, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8250469483568076, "calib/step_q_c_n": 426.0, "calib/step_q_gap": 0.11391519160005081, "calib/step_q_w": 0.7111317567567568, "calib/step_q_w_n": 592.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0512, "grad_norm": 0.0, "learning_rate": 9.388888888888888e-07, "loss": 0.0, "num_tokens": 8298086.0, "reward": 0.912109375, "reward_std": 0.1129549890756607, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.98828125, "step": 32 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7410264792415822, "calib/avg_num_step_conf": 4.1171875, "calib/ece": 0.4749609375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.1331435109512914, "calib/mean_conf": 0.8460546875, "calib/mu_c": 0.9297894736842106, "calib/mu_w": 0.7966459627329192, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4749609375000001, "calib/std_conf": 0.18993127081730735, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8218933333333335, "calib/step_q_c_n": 375.0, "calib/step_q_gap": 0.09179024054982832, "calib/step_q_w": 0.7301030927835052, "calib/step_q_w_n": 679.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0528, "grad_norm": 0.0, "learning_rate": 9.333333333333333e-07, "loss": 0.0, "num_tokens": 8557862.0, "reward": 0.869140625, "reward_std": 0.240284264087677, "rewards/accuracy_reward_step": 0.37109375, "rewards/format_reward_step": 0.99609375, "step": 33 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7619836458480191, "calib/avg_num_step_conf": 3.8671875, "calib/ece": 0.48741176470588243, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.48627450980392156, "calib/gap": 0.19402016072183847, "calib/mean_conf": 0.8089803921568628, "calib/mu_c": 0.9406097560975611, "calib/mu_w": 0.7465895953757227, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.48741176470588243, "calib/std_conf": 0.23772891879453528, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8473958333333333, "calib/step_q_c_n": 288.0, "calib/step_q_gap": 0.17088586182336185, "calib/step_q_w": 0.6765099715099715, "calib/step_q_w_n": 702.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 236.0, "completions/max_terminated_length": 236.0, "completions/mean_length": 0.921875, "completions/mean_terminated_length": 236.0, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.0544, "grad_norm": 2.0495622158050537, "learning_rate": 9.277777777777777e-07, "loss": 0.0386, "num_tokens": 8818570.0, "reward": 0.814453125, "reward_std": 0.1730746477842331, "rewards/accuracy_reward_step": 0.3203125, "rewards/format_reward_step": 0.98828125, "step": 34 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7191611842105263, "calib/avg_num_step_conf": 3.95703125, "calib/ece": 0.27968750000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.59765625, "calib/gap": 0.14004048582995943, "calib/mean_conf": 0.8734375, "calib/mu_c": 0.930328947368421, "calib/mu_w": 0.7902884615384616, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27968750000000003, "calib/std_conf": 0.18017325646096868, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8290814558058925, "calib/step_q_c_n": 577.0, "calib/step_q_gap": 0.10903558424625948, "calib/step_q_w": 0.7200458715596331, "calib/step_q_w_n": 436.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.056, "grad_norm": 0.0, "learning_rate": 9.222222222222222e-07, "loss": 0.0, "num_tokens": 9080714.0, "reward": 1.091796875, "reward_std": 0.11494496464729309, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.99609375, "step": 35 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7023721275018532, "calib/avg_num_step_conf": 3.796875, "calib/ece": 0.3784375000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.54296875, "calib/gap": 0.18079565109957985, "calib/mean_conf": 0.82375, "calib/mu_c": 0.9240350877192981, "calib/mu_w": 0.7432394366197183, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3784375000000001, "calib/std_conf": 0.24279621084357966, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8311666666666666, "calib/step_q_c_n": 420.0, "calib/step_q_gap": 0.09098550724637677, "calib/step_q_w": 0.7401811594202898, "calib/step_q_w_n": 552.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0576, "grad_norm": 0.0, "learning_rate": 9.166666666666665e-07, "loss": 0.0, "num_tokens": 9332594.0, "reward": 0.94140625, "reward_std": 0.2540907561779022, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.9921875, "step": 36 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7154203323558163, "calib/avg_num_step_conf": 3.90234375, "calib/ece": 0.31472656250000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.52734375, "calib/gap": 0.18695747800586504, "calib/mean_conf": 0.8303515625, "calib/mu_c": 0.9209090909090909, "calib/mu_w": 0.7339516129032259, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31472656250000003, "calib/std_conf": 0.22341864745094261, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8170146137787057, "calib/step_q_c_n": 479.0, "calib/step_q_gap": 0.11362999839409027, "calib/step_q_w": 0.7033846153846154, "calib/step_q_w_n": 520.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0592, "grad_norm": 0.0, "learning_rate": 9.11111111111111e-07, "loss": 0.0, "num_tokens": 9593346.0, "reward": 1.015625, "reward_std": 0.14032596349716187, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 1.0, "step": 37 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7065397350993379, "calib/avg_num_step_conf": 3.98046875, "calib/ece": 0.469529411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5490196078431373, "calib/gap": 0.09894167091186956, "calib/mean_conf": 0.8773725490196078, "calib/mu_c": 0.9359615384615385, "calib/mu_w": 0.837019867549669, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.469529411764706, "calib/std_conf": 0.15290827197571927, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8486821705426356, "calib/step_q_c_n": 387.0, "calib/step_q_gap": 0.08409356294769899, "calib/step_q_w": 0.7645886075949366, "calib/step_q_w_n": 632.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0608, "grad_norm": 0.0, "learning_rate": 9.055555555555556e-07, "loss": 0.0, "num_tokens": 9854890.0, "reward": 0.904296875, "reward_std": 0.1345357596874237, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 0.99609375, "step": 38 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6879616477272728, "calib/avg_num_step_conf": 4.26953125, "calib/ece": 0.5490625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.52734375, "calib/gap": 0.10099999999999987, "calib/mean_conf": 0.8615624999999999, "calib/mu_c": 0.9309999999999998, "calib/mu_w": 0.83, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5490625, "calib/std_conf": 0.1703532061446159, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8321254355400697, "calib/step_q_c_n": 287.0, "calib/step_q_gap": 0.0957110434805164, "calib/step_q_w": 0.7364143920595533, "calib/step_q_w_n": 806.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0624, "grad_norm": 0.0, "learning_rate": 9e-07, "loss": 0.0, "num_tokens": 10115306.0, "reward": 0.8125, "reward_std": 0.11705182492733002, "rewards/accuracy_reward_step": 0.3125, "rewards/format_reward_step": 1.0, "step": 39 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6907894736842105, "calib/avg_num_step_conf": 3.921875, "calib/ece": 0.3821093750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.48046875, "calib/gap": 0.1520360761057571, "calib/mean_conf": 0.827421875, "calib/mu_c": 0.9117543859649122, "calib/mu_w": 0.7597183098591551, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3821093750000001, "calib/std_conf": 0.20778123296266285, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8189731051344744, "calib/step_q_c_n": 409.0, "calib/step_q_gap": 0.09693949168909621, "calib/step_q_w": 0.7220336134453782, "calib/step_q_w_n": 595.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.064, "grad_norm": 0.0, "learning_rate": 8.944444444444445e-07, "loss": 0.0, "num_tokens": 10377042.0, "reward": 0.94140625, "reward_std": 0.15137451887130737, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.9921875, "step": 40 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7688755020080322, "calib/avg_num_step_conf": 3.65234375, "calib/ece": 0.45261718750000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.50390625, "calib/gap": 0.20637349397590365, "calib/mean_conf": 0.8041796874999999, "calib/mu_c": 0.9380000000000001, "calib/mu_w": 0.7316265060240964, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.45261718750000013, "calib/std_conf": 0.2582439778337577, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.8210031347962383, "calib/step_q_c_n": 319.0, "calib/step_q_gap": 0.1073667711598747, "calib/step_q_w": 0.7136363636363636, "calib/step_q_w_n": 616.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0656, "grad_norm": 0.0, "learning_rate": 8.888888888888888e-07, "loss": 0.0, "num_tokens": 10639186.0, "reward": 0.837890625, "reward_std": 0.09316996484994888, "rewards/accuracy_reward_step": 0.3515625, "rewards/format_reward_step": 0.97265625, "step": 41 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6606522817460319, "calib/avg_num_step_conf": 3.76171875, "calib/ece": 0.2989843750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.59375, "calib/gap": 0.1451785714285716, "calib/mean_conf": 0.861484375, "calib/mu_c": 0.9250000000000002, "calib/mu_w": 0.7798214285714286, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2989843750000001, "calib/std_conf": 0.20859683693876896, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.832390438247012, "calib/step_q_c_n": 502.0, "calib/step_q_gap": 0.109744017422717, "calib/step_q_w": 0.722646420824295, "calib/step_q_w_n": 461.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0672, "grad_norm": 0.0, "learning_rate": 8.833333333333333e-07, "loss": 0.0, "num_tokens": 10896498.0, "reward": 1.060546875, "reward_std": 0.12770098447799683, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.99609375, "step": 42 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6731284733573063, "calib/avg_num_step_conf": 3.61328125, "calib/ece": 0.4968359375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.50390625, "calib/gap": 0.0898247793396535, "calib/mean_conf": 0.8679296875, "calib/mu_c": 0.924421052631579, "calib/mu_w": 0.8345962732919255, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4968359375000001, "calib/std_conf": 0.1801800611364985, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8369817073170731, "calib/step_q_c_n": 328.0, "calib/step_q_gap": 0.06959477264370628, "calib/step_q_w": 0.7673869346733668, "calib/step_q_w_n": 597.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0688, "grad_norm": 0.0, "learning_rate": 8.777777777777777e-07, "loss": 0.0, "num_tokens": 11158642.0, "reward": 0.869140625, "reward_std": 0.20872020721435547, "rewards/accuracy_reward_step": 0.37109375, "rewards/format_reward_step": 0.99609375, "step": 43 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6859880315762669, "calib/avg_num_step_conf": 3.96875, "calib/ece": 0.464140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.60546875, "calib/gap": 0.129842118665648, "calib/mean_conf": 0.862578125, "calib/mu_c": 0.9406862745098039, "calib/mu_w": 0.8108441558441559, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.464140625, "calib/std_conf": 0.2055489832655087, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8461064425770308, "calib/step_q_c_n": 357.0, "calib/step_q_gap": 0.11071949265290337, "calib/step_q_w": 0.7353869499241275, "calib/step_q_w_n": 659.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0704, "grad_norm": 0.0, "learning_rate": 8.722222222222222e-07, "loss": 0.0, "num_tokens": 11417162.0, "reward": 0.89453125, "reward_std": 0.189998596906662, "rewards/accuracy_reward_step": 0.3984375, "rewards/format_reward_step": 0.9921875, "step": 44 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5858134920634922, "calib/avg_num_step_conf": 4.01171875, "calib/ece": 0.43546874999999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.53515625, "calib/gap": 0.045575396825396686, "calib/mean_conf": 0.872578125, "calib/mu_c": 0.8982142857142856, "calib/mu_w": 0.852638888888889, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43527343749999997, "calib/std_conf": 0.1498345237970354, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7948291571753987, "calib/step_q_c_n": 439.0, "calib/step_q_gap": 0.004369973501929358, "calib/step_q_w": 0.7904591836734693, "calib/step_q_w_n": 588.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.072, "grad_norm": 0.0, "learning_rate": 8.666666666666667e-07, "loss": 0.0, "num_tokens": 11671066.0, "reward": 0.93359375, "reward_std": 0.18399940431118011, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.9921875, "step": 45 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6808314234784822, "calib/avg_num_step_conf": 3.90234375, "calib/ece": 0.4376953125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.515625, "calib/gap": 0.14544563279857414, "calib/mean_conf": 0.8361328124999998, "calib/mu_c": 0.9236274509803923, "calib/mu_w": 0.7781818181818182, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4376953125000001, "calib/std_conf": 0.2194014539282724, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8291452991452992, "calib/step_q_c_n": 351.0, "calib/step_q_gap": 0.09433048433048441, "calib/step_q_w": 0.7348148148148148, "calib/step_q_w_n": 648.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0736, "grad_norm": 0.0, "learning_rate": 8.611111111111111e-07, "loss": 0.0, "num_tokens": 11925954.0, "reward": 0.89453125, "reward_std": 0.13965940475463867, "rewards/accuracy_reward_step": 0.3984375, "rewards/format_reward_step": 0.9921875, "step": 46 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.72648835202761, "calib/avg_num_step_conf": 3.4140625, "calib/ece": 0.3258039215686274, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.5843137254901961, "calib/gap": 0.1812251941328733, "calib/mean_conf": 0.8469803921568628, "calib/mu_c": 0.9336842105263159, "calib/mu_w": 0.7524590163934426, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.3256078431372549, "calib/std_conf": 0.2385591956131872, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.8489910313901344, "calib/step_q_c_n": 446.0, "calib/step_q_gap": 0.12798635849293805, "calib/step_q_w": 0.7210046728971964, "calib/step_q_w_n": 428.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 157.0, "completions/max_terminated_length": 157.0, "completions/mean_length": 0.61328125, "completions/mean_terminated_length": 157.0, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.0752, "grad_norm": 2.0297513008117676, "learning_rate": 8.555555555555555e-07, "loss": 0.0386, "num_tokens": 12186583.0, "reward": 1.00390625, "reward_std": 0.14966705441474915, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.96875, "step": 47 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6644511015415341, "calib/avg_num_step_conf": 3.7734375, "calib/ece": 0.38878906250000017, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.45703125, "calib/gap": 0.16714909817137868, "calib/mean_conf": 0.8145703125, "calib/mu_c": 0.9105504587155964, "calib/mu_w": 0.7434013605442177, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.38878906250000017, "calib/std_conf": 0.2448257729869393, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7859999999999999, "calib/step_q_c_n": 365.0, "calib/step_q_gap": 0.10676539101497495, "calib/step_q_w": 0.679234608985025, "calib/step_q_w_n": 601.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0768, "grad_norm": 0.0, "learning_rate": 8.499999999999999e-07, "loss": 0.0, "num_tokens": 12439255.0, "reward": 0.91796875, "reward_std": 0.12916389107704163, "rewards/accuracy_reward_step": 0.42578125, "rewards/format_reward_step": 0.984375, "step": 48 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7913225725725725, "calib/avg_num_step_conf": 3.71484375, "calib/ece": 0.3591372549019609, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.45098039215686275, "calib/gap": 0.23496433933933936, "calib/mean_conf": 0.7944313725490195, "calib/mu_c": 0.9271171171171172, "calib/mu_w": 0.6921527777777778, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.3591372549019609, "calib/std_conf": 0.264440901504121, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.824221635883905, "calib/step_q_c_n": 379.0, "calib/step_q_gap": 0.13247338413565335, "calib/step_q_w": 0.6917482517482516, "calib/step_q_w_n": 572.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 53.0, "completions/max_terminated_length": 53.0, "completions/mean_length": 0.20703125, "completions/mean_terminated_length": 53.0, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.0784, "grad_norm": 4.472797393798828, "learning_rate": 8.444444444444444e-07, "loss": 0.0189, "num_tokens": 12697988.0, "reward": 0.921875, "reward_std": 0.1474648416042328, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.9765625, "step": 49 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.576751708984375, "calib/avg_num_step_conf": 3.75390625, "calib/ece": 0.37703124999999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.58203125, "calib/gap": 0.07781249999999995, "calib/mean_conf": 0.8727343749999998, "calib/mu_c": 0.9116406249999999, "calib/mu_w": 0.8338281249999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3748828125, "calib/std_conf": 0.1664365252682216, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8098547717842323, "calib/step_q_c_n": 482.0, "calib/step_q_gap": 0.043529093287363874, "calib/step_q_w": 0.7663256784968684, "calib/step_q_w_n": 479.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 8.388888888888888e-07, "loss": 0.0, "num_tokens": 12959236.0, "reward": 1.0, "reward_std": 0.13269482553005219, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 1.0, "step": 50 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6815934065934066, "calib/avg_num_step_conf": 3.86328125, "calib/ece": 0.36871093750000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.55078125, "calib/gap": 0.10982051282051297, "calib/mean_conf": 0.8608984375, "calib/mu_c": 0.9166666666666667, "calib/mu_w": 0.8068461538461538, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.36871093750000006, "calib/std_conf": 0.18411415191412794, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8226382978723404, "calib/step_q_c_n": 470.0, "calib/step_q_gap": 0.07254195875866032, "calib/step_q_w": 0.7500963391136801, "calib/step_q_w_n": 519.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0816, "grad_norm": 0.0, "learning_rate": 8.333333333333333e-07, "loss": 0.0, "num_tokens": 13216020.0, "reward": 0.98828125, "reward_std": 0.17532755434513092, "rewards/accuracy_reward_step": 0.4921875, "rewards/format_reward_step": 0.9921875, "step": 51 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6790260099612617, "calib/avg_num_step_conf": 3.4296875, "calib/ece": 0.4078906250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5703125, "calib/gap": 0.12646744143147004, "calib/mean_conf": 0.8649218750000001, "calib/mu_c": 0.9335897435897434, "calib/mu_w": 0.8071223021582734, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4078906250000001, "calib/std_conf": 0.17717618391444254, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8444444444444443, "calib/step_q_c_n": 342.0, "calib/step_q_gap": 0.10994817578772786, "calib/step_q_w": 0.7344962686567165, "calib/step_q_w_n": 536.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0832, "grad_norm": 0.0, "learning_rate": 8.277777777777777e-07, "loss": 0.0, "num_tokens": 13477620.0, "reward": 0.955078125, "reward_std": 0.08969886600971222, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.99609375, "step": 52 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8072590738423029, "calib/avg_num_step_conf": 3.796875, "calib/ece": 0.48042968750000015, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.40234375, "calib/gap": 0.2586889862327909, "calib/mean_conf": 0.7460546875, "calib/mu_c": 0.9360294117647059, "calib/mu_w": 0.677340425531915, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.48042968750000015, "calib/std_conf": 0.3078321074616443, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.828135593220339, "calib/step_q_c_n": 236.0, "calib/step_q_gap": 0.14579863669859994, "calib/step_q_w": 0.6823369565217391, "calib/step_q_w_n": 736.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0848, "grad_norm": 0.0, "learning_rate": 8.222222222222221e-07, "loss": 0.0, "num_tokens": 13739124.0, "reward": 0.763671875, "reward_std": 0.1348041594028473, "rewards/accuracy_reward_step": 0.265625, "rewards/format_reward_step": 0.99609375, "step": 53 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6702647531856983, "calib/avg_num_step_conf": 3.453125, "calib/ece": 0.4116862745098041, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.5529411764705883, "calib/gap": 0.10485030310528254, "calib/mean_conf": 0.8744313725490196, "calib/mu_c": 0.9307627118644066, "calib/mu_w": 0.8259124087591241, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4116862745098041, "calib/std_conf": 0.17598008520373032, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8468266666666668, "calib/step_q_c_n": 375.0, "calib/step_q_gap": 0.09000937786509522, "calib/step_q_w": 0.7568172888015716, "calib/step_q_w_n": 509.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 18.0, "completions/max_terminated_length": 18.0, "completions/mean_length": 0.0703125, "completions/mean_terminated_length": 18.0, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.0864, "grad_norm": 10.00183391571045, "learning_rate": 8.166666666666666e-07, "loss": 0.0253, "num_tokens": 13999046.0, "reward": 0.955078125, "reward_std": 0.09943175315856934, "rewards/accuracy_reward_step": 0.4609375, "rewards/format_reward_step": 0.98828125, "step": 54 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.701171875, "calib/avg_num_step_conf": 3.3984375, "calib/ece": 0.32453125000000016, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.5078125, "calib/gap": 0.1929687499999999, "calib/mean_conf": 0.8245312499999999, "calib/mu_c": 0.9210156249999999, "calib/mu_w": 0.728046875, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.32453125000000016, "calib/std_conf": 0.24327633264137616, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.835586854460094, "calib/step_q_c_n": 426.0, "calib/step_q_gap": 0.11894271031594983, "calib/step_q_w": 0.7166441441441441, "calib/step_q_w_n": 444.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.088, "grad_norm": 0.0, "learning_rate": 8.11111111111111e-07, "loss": 0.0, "num_tokens": 14260494.0, "reward": 0.99609375, "reward_std": 0.16728200018405914, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.9921875, "step": 55 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7270757020757022, "calib/avg_num_step_conf": 3.45703125, "calib/ece": 0.3310937500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.515625, "calib/gap": 0.1783589743589744, "calib/mean_conf": 0.83890625, "calib/mu_c": 0.9266923076923078, "calib/mu_w": 0.7483333333333334, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3310937500000001, "calib/std_conf": 0.2232387118331798, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8484615384615385, "calib/step_q_c_n": 403.0, "calib/step_q_gap": 0.12806734759016913, "calib/step_q_w": 0.7203941908713694, "calib/step_q_w_n": 482.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0896, "grad_norm": 0.0, "learning_rate": 8.055555555555556e-07, "loss": 0.0, "num_tokens": 14522638.0, "reward": 1.0078125, "reward_std": 0.15558436512947083, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 1.0, "step": 56 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7003732287449392, "calib/avg_num_step_conf": 3.80078125, "calib/ece": 0.41835937500000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.4453125, "calib/gap": 0.12768218623481775, "calib/mean_conf": 0.822265625, "calib/mu_c": 0.8980769230769231, "calib/mu_w": 0.7703947368421054, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41718750000000004, "calib/std_conf": 0.2046766307944299, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8142191780821919, "calib/step_q_c_n": 365.0, "calib/step_q_gap": 0.08412049387166565, "calib/step_q_w": 0.7300986842105263, "calib/step_q_w_n": 608.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0912, "grad_norm": 0.0, "learning_rate": 8e-07, "loss": 0.0, "num_tokens": 14777966.0, "reward": 0.90625, "reward_std": 0.18083682656288147, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 1.0, "step": 57 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5885396825396825, "calib/avg_num_step_conf": 3.75, "calib/ece": 0.43050980392156885, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.5019607843137255, "calib/gap": 0.1133523809523811, "calib/mean_conf": 0.8422745098039216, "calib/mu_c": 0.9089523809523811, "calib/mu_w": 0.7956, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43050980392156885, "calib/std_conf": 0.20675343641133745, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.819568733153639, "calib/step_q_c_n": 371.0, "calib/step_q_gap": 0.08191168731323151, "calib/step_q_w": 0.7376570458404075, "calib/step_q_w_n": 589.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0928, "grad_norm": 0.0, "learning_rate": 7.944444444444444e-07, "loss": 0.0, "num_tokens": 15037142.0, "reward": 0.90625, "reward_std": 0.17768144607543945, "rewards/accuracy_reward_step": 0.41015625, "rewards/format_reward_step": 0.9921875, "step": 58 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6769931227712686, "calib/avg_num_step_conf": 3.5390625, "calib/ece": 0.3987450980392158, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.36470588235294116, "calib/gap": 0.16114875191034128, "calib/mean_conf": 0.8018823529411765, "calib/mu_c": 0.8973076923076923, "calib/mu_w": 0.736158940397351, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.39639215686274526, "calib/std_conf": 0.2409108245281972, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8240173410404624, "calib/step_q_c_n": 346.0, "calib/step_q_gap": 0.10133876961189114, "calib/step_q_w": 0.7226785714285713, "calib/step_q_w_n": 560.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 118.0, "completions/max_terminated_length": 118.0, "completions/mean_length": 0.4609375, "completions/mean_terminated_length": 118.0, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.0944, "grad_norm": 3.116136074066162, "learning_rate": 7.888888888888889e-07, "loss": 0.0189, "num_tokens": 15299268.0, "reward": 0.8984375, "reward_std": 0.16622620820999146, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 0.984375, "step": 59 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7112884160756502, "calib/avg_num_step_conf": 3.8828125, "calib/ece": 0.4412890625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.421875, "calib/gap": 0.16732466509062272, "calib/mean_conf": 0.8084765624999999, "calib/mu_c": 0.9143617021276598, "calib/mu_w": 0.747037037037037, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4412890625000001, "calib/std_conf": 0.24047565056193024, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8276685393258426, "calib/step_q_c_n": 356.0, "calib/step_q_gap": 0.13594440139480812, "calib/step_q_w": 0.6917241379310345, "calib/step_q_w_n": 638.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.096, "grad_norm": 0.0, "learning_rate": 7.833333333333333e-07, "loss": 0.0, "num_tokens": 15561412.0, "reward": 0.865234375, "reward_std": 0.15387766063213348, "rewards/accuracy_reward_step": 0.3671875, "rewards/format_reward_step": 0.99609375, "step": 60 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.749526717557252, "calib/avg_num_step_conf": 3.421875, "calib/ece": 0.303359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.47265625, "calib/gap": 0.21300396946564892, "calib/mean_conf": 0.8150781249999999, "calib/mu_c": 0.9190839694656489, "calib/mu_w": 0.7060799999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.303359375, "calib/std_conf": 0.2498999677800787, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.8293488372093023, "calib/step_q_c_n": 430.0, "calib/step_q_gap": 0.12147888205235169, "calib/step_q_w": 0.7078699551569506, "calib/step_q_w_n": 446.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0976, "grad_norm": 0.0, "learning_rate": 7.777777777777778e-07, "loss": 0.0, "num_tokens": 15823036.0, "reward": 0.99609375, "reward_std": 0.16821058094501495, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.96875, "step": 61 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.687896728515625, "calib/avg_num_step_conf": 3.578125, "calib/ece": 0.3723437499999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5703125, "calib/gap": 0.11531249999999993, "calib/mean_conf": 0.864921875, "calib/mu_c": 0.9225781249999998, "calib/mu_w": 0.8072656249999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3686328124999999, "calib/std_conf": 0.20250962976235073, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8241387559808613, "calib/step_q_c_n": 418.0, "calib/step_q_gap": 0.03986164754712651, "calib/step_q_w": 0.7842771084337348, "calib/step_q_w_n": 498.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.0992, "grad_norm": 0.0, "learning_rate": 7.722222222222222e-07, "loss": 0.0, "num_tokens": 16084228.0, "reward": 0.998046875, "reward_std": 0.1526850312948227, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.99609375, "step": 62 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6866692601958115, "calib/avg_num_step_conf": 3.515625, "calib/ece": 0.4001953125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3359375, "calib/gap": 0.18286909161641685, "calib/mean_conf": 0.7791015625, "calib/mu_c": 0.8926804123711339, "calib/mu_w": 0.7098113207547171, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4001953125000001, "calib/std_conf": 0.2565858573364062, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7998165137614678, "calib/step_q_c_n": 327.0, "calib/step_q_gap": 0.0947903357509966, "calib/step_q_w": 0.7050261780104712, "calib/step_q_w_n": 573.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1008, "grad_norm": 0.0, "learning_rate": 7.666666666666667e-07, "loss": 0.0, "num_tokens": 16338804.0, "reward": 0.873046875, "reward_std": 0.25302910804748535, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.98828125, "step": 63 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7974129546780783, "calib/avg_num_step_conf": 3.62890625, "calib/ece": 0.3880468750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.37109375, "calib/gap": 0.23496206963625754, "calib/mean_conf": 0.7669531249999999, "calib/mu_c": 0.9128865979381443, "calib/mu_w": 0.6779245283018868, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3880468750000001, "calib/std_conf": 0.279721006634708, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8299152542372882, "calib/step_q_c_n": 354.0, "calib/step_q_gap": 0.16796742815033172, "calib/step_q_w": 0.6619478260869565, "calib/step_q_w_n": 575.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1024, "grad_norm": 0.0, "learning_rate": 7.61111111111111e-07, "loss": 0.0, "num_tokens": 16600900.0, "reward": 0.875, "reward_std": 0.13151776790618896, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.9921875, "step": 64 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7814169800118742, "calib/avg_num_step_conf": 3.47265625, "calib/ece": 0.4176953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.375, "calib/gap": 0.2235787321063395, "calib/mean_conf": 0.7809765624999999, "calib/mu_c": 0.9233333333333333, "calib/mu_w": 0.6997546012269938, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4176953125, "calib/std_conf": 0.25815644375200786, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8218688524590163, "calib/step_q_c_n": 305.0, "calib/step_q_gap": 0.1455674825960025, "calib/step_q_w": 0.6763013698630138, "calib/step_q_w_n": 584.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.104, "grad_norm": 0.0, "learning_rate": 7.555555555555555e-07, "loss": 0.0, "num_tokens": 16857884.0, "reward": 0.859375, "reward_std": 0.1349327117204666, "rewards/accuracy_reward_step": 0.36328125, "rewards/format_reward_step": 0.9921875, "step": 65 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6733002740441081, "calib/avg_num_step_conf": 3.58203125, "calib/ece": 0.39203921568627464, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.403921568627451, "calib/gap": 0.2015600939579798, "calib/mean_conf": 0.7724313725490196, "calib/mu_c": 0.8973195876288659, "calib/mu_w": 0.6957594936708861, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.39203921568627464, "calib/std_conf": 0.2766925989430885, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8080466472303206, "calib/step_q_c_n": 343.0, "calib/step_q_gap": 0.11146128137666211, "calib/step_q_w": 0.6965853658536585, "calib/step_q_w_n": 574.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 367.0, "completions/max_terminated_length": 367.0, "completions/mean_length": 1.43359375, "completions/mean_terminated_length": 367.0, "completions/min_length": 0.0, "completions/min_terminated_length": 367.0, "epoch": 0.1056, "grad_norm": 2.5103132724761963, "learning_rate": 7.5e-07, "loss": 0.0386, "num_tokens": 17118907.0, "reward": 0.87109375, "reward_std": 0.1428884118795395, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.984375, "step": 66 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6894213381555154, "calib/avg_num_step_conf": 3.375, "calib/ece": 0.42974609375000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.421875, "calib/gap": 0.16449302505812435, "calib/mean_conf": 0.81255859375, "calib/mu_c": 0.9140816326530613, "calib/mu_w": 0.749588607594937, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.42974609375000006, "calib/std_conf": 0.2582380890365952, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.837439446366782, "calib/step_q_c_n": 289.0, "calib/step_q_gap": 0.14036118549721677, "calib/step_q_w": 0.6970782608695653, "calib/step_q_w_n": 575.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1072, "grad_norm": 0.0, "learning_rate": 7.444444444444444e-07, "loss": 0.0, "num_tokens": 17379875.0, "reward": 0.869140625, "reward_std": 0.1348200887441635, "rewards/accuracy_reward_step": 0.3828125, "rewards/format_reward_step": 0.97265625, "step": 67 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7861512111573281, "calib/avg_num_step_conf": 3.640625, "calib/ece": 0.3138671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.2890625, "calib/gap": 0.18239417665769486, "calib/mean_conf": 0.7904296875000001, "calib/mu_c": 0.8859016393442621, "calib/mu_w": 0.7035074626865673, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3138671875, "calib/std_conf": 0.2149768586235559, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7975514874141876, "calib/step_q_c_n": 437.0, "calib/step_q_gap": 0.11181411367681382, "calib/step_q_w": 0.6857373737373738, "calib/step_q_w_n": 495.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1088, "grad_norm": 0.0, "learning_rate": 7.388888888888889e-07, "loss": 0.0, "num_tokens": 17640571.0, "reward": 0.97265625, "reward_std": 0.24158510565757751, "rewards/accuracy_reward_step": 0.4765625, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7416682357371492, "calib/avg_num_step_conf": 3.40625, "calib/ece": 0.3035433070866143, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.4448818897637795, "calib/gap": 0.13022406326492197, "calib/mean_conf": 0.8586614173228345, "calib/mu_c": 0.9165957446808511, "calib/mu_w": 0.7863716814159292, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3035433070866143, "calib/std_conf": 0.16534079434657542, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.830427927927928, "calib/step_q_c_n": 444.0, "calib/step_q_gap": 0.076105498021386, "calib/step_q_w": 0.754322429906542, "calib/step_q_w_n": 428.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 211.0, "completions/max_terminated_length": 211.0, "completions/mean_length": 1.640625, "completions/mean_terminated_length": 210.0, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.1104, "grad_norm": 2.5319273471832275, "learning_rate": 7.333333333333332e-07, "loss": 0.0773, "num_tokens": 17902023.0, "reward": 1.041015625, "reward_std": 0.14334642887115479, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.98046875, "step": 69 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6981770833333334, "calib/avg_num_step_conf": 3.31640625, "calib/ece": 0.4160937500000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.375, "calib/gap": 0.16408333333333325, "calib/mean_conf": 0.79109375, "calib/mu_c": 0.8936458333333333, "calib/mu_w": 0.7295625, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4160937500000004, "calib/std_conf": 0.23735928191443767, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8111678832116788, "calib/step_q_c_n": 274.0, "calib/step_q_gap": 0.11728962234211349, "calib/step_q_w": 0.6938782608695653, "calib/step_q_w_n": 575.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.112, "grad_norm": 0.0, "learning_rate": 7.277777777777777e-07, "loss": 0.0, "num_tokens": 18163127.0, "reward": 0.87109375, "reward_std": 0.14940109848976135, "rewards/accuracy_reward_step": 0.375, "rewards/format_reward_step": 0.9921875, "step": 70 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7026192241166296, "calib/avg_num_step_conf": 3.328125, "calib/ece": 0.2819921875000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.41015625, "calib/gap": 0.16275142080553506, "calib/mean_conf": 0.8366796875, "calib/mu_c": 0.9091549295774649, "calib/mu_w": 0.7464035087719298, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2819921875000001, "calib/std_conf": 0.20704088642078003, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.814378947368421, "calib/step_q_c_n": 475.0, "calib/step_q_gap": 0.08575825771324841, "calib/step_q_w": 0.7286206896551726, "calib/step_q_w_n": 377.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1136, "grad_norm": 0.0, "learning_rate": 7.222222222222221e-07, "loss": 0.0, "num_tokens": 18419703.0, "reward": 1.052734375, "reward_std": 0.20845818519592285, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.99609375, "step": 71 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7274323966065748, "calib/avg_num_step_conf": 3.3359375, "calib/ece": 0.4427734375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.33984375, "calib/gap": 0.18158801696712623, "calib/mean_conf": 0.8021484375000001, "calib/mu_c": 0.9184782608695654, "calib/mu_w": 0.7368902439024392, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4427734375000001, "calib/std_conf": 0.23878859602440944, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8430693069306932, "calib/step_q_c_n": 303.0, "calib/step_q_gap": 0.12232520529729929, "calib/step_q_w": 0.7207441016333939, "calib/step_q_w_n": 551.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1152, "grad_norm": 0.0, "learning_rate": 7.166666666666667e-07, "loss": 0.0, "num_tokens": 18676799.0, "reward": 0.857421875, "reward_std": 0.09679568558931351, "rewards/accuracy_reward_step": 0.359375, "rewards/format_reward_step": 0.99609375, "step": 72 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6822100313479624, "calib/avg_num_step_conf": 3.515625, "calib/ece": 0.53125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.30859375, "calib/gap": 0.18049460118425653, "calib/mean_conf": 0.7578125, "calib/mu_c": 0.8974137931034484, "calib/mu_w": 0.7169191919191918, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.53125, "calib/std_conf": 0.26937264355117796, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8082198952879581, "calib/step_q_c_n": 191.0, "calib/step_q_gap": 0.11064584733309213, "calib/step_q_w": 0.697574047954866, "calib/step_q_w_n": 709.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1168, "grad_norm": 0.0, "learning_rate": 7.111111111111111e-07, "loss": 0.0, "num_tokens": 18935623.0, "reward": 0.71875, "reward_std": 0.10149794071912766, "rewards/accuracy_reward_step": 0.2265625, "rewards/format_reward_step": 0.984375, "step": 73 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.76161891206139, "calib/avg_num_step_conf": 3.40234375, "calib/ece": 0.32378906250000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.30078125, "calib/gap": 0.21693112197536957, "calib/mean_conf": 0.7651953125, "calib/mu_c": 0.8863716814159291, "calib/mu_w": 0.6694405594405596, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.32378906250000006, "calib/std_conf": 0.25465502273276946, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8073655913978496, "calib/step_q_c_n": 372.0, "calib/step_q_gap": 0.1391491585321183, "calib/step_q_w": 0.6682164328657313, "calib/step_q_w_n": 499.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1184, "grad_norm": 0.0, "learning_rate": 7.055555555555556e-07, "loss": 0.0, "num_tokens": 19191303.0, "reward": 0.931640625, "reward_std": 0.17043370008468628, "rewards/accuracy_reward_step": 0.44140625, "rewards/format_reward_step": 0.98046875, "step": 74 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7952186805040772, "calib/avg_num_step_conf": 3.37109375, "calib/ece": 0.29574218750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.30078125, "calib/gap": 0.2813306152705708, "calib/mean_conf": 0.7410546874999999, "calib/mu_c": 0.8971052631578947, "calib/mu_w": 0.615774647887324, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.29574218750000003, "calib/std_conf": 0.28964697824986424, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8019251336898396, "calib/step_q_c_n": 374.0, "calib/step_q_gap": 0.15053454064280491, "calib/step_q_w": 0.6513905930470347, "calib/step_q_w_n": 489.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 7e-07, "loss": 0.0, "num_tokens": 19450407.0, "reward": 0.943359375, "reward_std": 0.1340028941631317, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.99609375, "step": 75 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7223510971786834, "calib/avg_num_step_conf": 3.32421875, "calib/ece": 0.37588235294117656, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.38823529411764707, "calib/gap": 0.17221630094043883, "calib/mean_conf": 0.8072549019607844, "calib/mu_c": 0.9051818181818182, "calib/mu_w": 0.7329655172413794, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37588235294117656, "calib/std_conf": 0.23130387814431091, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8194827586206896, "calib/step_q_c_n": 348.0, "calib/step_q_gap": 0.10007918009186267, "calib/step_q_w": 0.719403578528827, "calib/step_q_w_n": 503.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 142.0, "completions/max_terminated_length": 142.0, "completions/mean_length": 0.5546875, "completions/mean_terminated_length": 142.0, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.1216, "grad_norm": 2.447387456893921, "learning_rate": 6.944444444444444e-07, "loss": 0.0386, "num_tokens": 19710389.0, "reward": 0.9296875, "reward_std": 0.15110857784748077, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.9921875, "step": 76 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6762873882207833, "calib/avg_num_step_conf": 3.45703125, "calib/ece": 0.3690234375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.33984375, "calib/gap": 0.14401911810052415, "calib/mean_conf": 0.8182421874999999, "calib/mu_c": 0.8975652173913042, "calib/mu_w": 0.7535460992907801, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3690234375000001, "calib/std_conf": 0.20544833771587162, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8123118279569894, "calib/step_q_c_n": 372.0, "calib/step_q_gap": 0.07283814374646314, "calib/step_q_w": 0.7394736842105263, "calib/step_q_w_n": 513.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1232, "grad_norm": 0.0, "learning_rate": 6.888888888888889e-07, "loss": 0.0, "num_tokens": 19969821.0, "reward": 0.94921875, "reward_std": 0.06009919196367264, "rewards/accuracy_reward_step": 0.44921875, "rewards/format_reward_step": 1.0, "step": 77 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6754556419190565, "calib/avg_num_step_conf": 3.30078125, "calib/ece": 0.4238823529411765, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.33725490196078434, "calib/gap": 0.16594813722862511, "calib/mean_conf": 0.7807450980392155, "calib/mu_c": 0.8874725274725276, "calib/mu_w": 0.7215243902439025, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4238823529411765, "calib/std_conf": 0.2421145283309367, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8049163879598663, "calib/step_q_c_n": 299.0, "calib/step_q_gap": 0.10936693741041581, "calib/step_q_w": 0.6955494505494505, "calib/step_q_w_n": 546.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 193.0, "completions/max_terminated_length": 193.0, "completions/mean_length": 0.75390625, "completions/mean_terminated_length": 193.0, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.1248, "grad_norm": 4.708121299743652, "learning_rate": 6.833333333333333e-07, "loss": 0.0386, "num_tokens": 20226030.0, "reward": 0.849609375, "reward_std": 0.14808812737464905, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.98828125, "step": 78 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7350085665334095, "calib/avg_num_step_conf": 2.75390625, "calib/ece": 0.401640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.33984375, "calib/gap": 0.1898940288089349, "calib/mean_conf": 0.803984375, "calib/mu_c": 0.9174757281553401, "calib/mu_w": 0.7275816993464052, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.401640625, "calib/std_conf": 0.24545028723930915, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8561290322580645, "calib/step_q_c_n": 248.0, "calib/step_q_gap": 0.11291240206112796, "calib/step_q_w": 0.7432166301969365, "calib/step_q_w_n": 457.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1264, "grad_norm": 0.0, "learning_rate": 6.777777777777778e-07, "loss": 0.0, "num_tokens": 20480094.0, "reward": 0.892578125, "reward_std": 0.0921671986579895, "rewards/accuracy_reward_step": 0.40234375, "rewards/format_reward_step": 0.98046875, "step": 79 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7242817735315884, "calib/avg_num_step_conf": 3.1953125, "calib/ece": 0.45742187500000014, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.296875, "calib/gap": 0.15274305321940385, "calib/mean_conf": 0.805078125, "calib/mu_c": 0.9047191011235954, "calib/mu_w": 0.7519760479041916, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.45742187500000014, "calib/std_conf": 0.2142045462787482, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8306181818181819, "calib/step_q_c_n": 275.0, "calib/step_q_gap": 0.08233088900050234, "calib/step_q_w": 0.7482872928176796, "calib/step_q_w_n": 543.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.128, "grad_norm": 0.0, "learning_rate": 6.722222222222222e-07, "loss": 0.0, "num_tokens": 20741550.0, "reward": 0.83984375, "reward_std": 0.1633341908454895, "rewards/accuracy_reward_step": 0.34765625, "rewards/format_reward_step": 0.984375, "step": 80 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7733983983983984, "calib/avg_num_step_conf": 2.89453125, "calib/ece": 0.3342578125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.21875, "calib/gap": 0.213455955955956, "calib/mean_conf": 0.7561328125, "calib/mu_c": 0.8795370370370372, "calib/mu_w": 0.6660810810810812, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3342578125000001, "calib/std_conf": 0.25879129716016314, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.803051948051948, "calib/step_q_c_n": 308.0, "calib/step_q_gap": 0.12961083950691332, "calib/step_q_w": 0.6734411085450347, "calib/step_q_w_n": 433.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1296, "grad_norm": 0.0, "learning_rate": 6.666666666666666e-07, "loss": 0.0, "num_tokens": 20996598.0, "reward": 0.916015625, "reward_std": 0.18609514832496643, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.98828125, "step": 81 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.8024912168636218, "calib/avg_num_step_conf": 2.734375, "calib/ece": 0.29730468750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.21484375, "calib/gap": 0.34462408176301507, "calib/mean_conf": 0.6918359375, "calib/mu_c": 0.9004950495049505, "calib/mu_w": 0.5558709677419355, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.29730468750000005, "calib/std_conf": 0.3326998759520299, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8439405204460966, "calib/step_q_c_n": 269.0, "calib/step_q_gap": 0.21570386151338206, "calib/step_q_w": 0.6282366589327145, "calib/step_q_w_n": 431.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1312, "grad_norm": 0.0, "learning_rate": 6.611111111111111e-07, "loss": 0.0, "num_tokens": 21256974.0, "reward": 0.884765625, "reward_std": 0.15012259781360626, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.98046875, "step": 82 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7429947188567878, "calib/avg_num_step_conf": 2.609375, "calib/ece": 0.3607421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.2421875, "calib/gap": 0.18623361292326834, "calib/mean_conf": 0.7943359375000001, "calib/mu_c": 0.8998198198198201, "calib/mu_w": 0.7135862068965517, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.3607421875, "calib/std_conf": 0.24654239548401424, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.8404744525547445, "calib/step_q_c_n": 274.0, "calib/step_q_gap": 0.1236978028085517, "calib/step_q_w": 0.7167766497461928, "calib/step_q_w_n": 394.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1328, "grad_norm": 0.0, "learning_rate": 6.555555555555555e-07, "loss": 0.0, "num_tokens": 21514150.0, "reward": 0.919921875, "reward_std": 0.17466981709003448, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.97265625, "step": 83 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.8261391625615763, "calib/avg_num_step_conf": 2.6640625, "calib/ece": 0.3087890625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.203125, "calib/gap": 0.2556527093596058, "calib/mean_conf": 0.7619140625, "calib/mu_c": 0.9017241379310346, "calib/mu_w": 0.6460714285714287, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3087890625000001, "calib/std_conf": 0.25246257542405387, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8455813953488374, "calib/step_q_c_n": 258.0, "calib/step_q_gap": 0.17553422553751674, "calib/step_q_w": 0.6700471698113206, "calib/step_q_w_n": 424.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1344, "grad_norm": 0.0, "learning_rate": 6.5e-07, "loss": 0.0, "num_tokens": 21774518.0, "reward": 0.94140625, "reward_std": 0.18030638992786407, "rewards/accuracy_reward_step": 0.453125, "rewards/format_reward_step": 0.9765625, "step": 84 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7715619849844351, "calib/avg_num_step_conf": 2.48828125, "calib/ece": 0.27863281250000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.23046875, "calib/gap": 0.23036501251297092, "calib/mean_conf": 0.7825390624999999, "calib/mu_c": 0.8968217054263566, "calib/mu_w": 0.6664566929133857, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27863281250000005, "calib/std_conf": 0.2438529398769289, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8435869565217391, "calib/step_q_c_n": 276.0, "calib/step_q_gap": 0.1602905576297724, "calib/step_q_w": 0.6832963988919667, "calib/step_q_w_n": 361.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.136, "grad_norm": 0.0, "learning_rate": 6.444444444444444e-07, "loss": 0.0, "num_tokens": 22032766.0, "reward": 1.001953125, "reward_std": 0.13992658257484436, "rewards/accuracy_reward_step": 0.50390625, "rewards/format_reward_step": 0.99609375, "step": 85 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7565901083126713, "calib/avg_num_step_conf": 2.83203125, "calib/ece": 0.41156862745098033, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.23921568627450981, "calib/gap": 0.1795249902127104, "calib/mean_conf": 0.7919607843137254, "calib/mu_c": 0.9031958762886598, "calib/mu_w": 0.7236708860759494, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.41156862745098033, "calib/std_conf": 0.22489055820112214, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8238671875, "calib/step_q_c_n": 256.0, "calib/step_q_gap": 0.10211878664712148, "calib/step_q_w": 0.7217484008528785, "calib/step_q_w_n": 469.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 131.0, "completions/max_terminated_length": 131.0, "completions/mean_length": 0.51171875, "completions/mean_terminated_length": 131.0, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.1376, "grad_norm": 4.982574939727783, "learning_rate": 6.388888888888888e-07, "loss": 0.0211, "num_tokens": 22292889.0, "reward": 0.869140625, "reward_std": 0.16770878434181213, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.98046875, "step": 86 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.75, "calib/avg_num_step_conf": 2.6015625, "calib/ece": 0.22847656250000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.22265625, "calib/gap": 0.19762512512512487, "calib/mean_conf": 0.7987890625, "calib/mu_c": 0.8821621621621621, "calib/mu_w": 0.6845370370370373, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.22457031250000006, "calib/std_conf": 0.22614055199227556, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.8204427083333333, "calib/step_q_c_n": 384.0, "calib/step_q_gap": 0.06884696365248233, "calib/step_q_w": 0.751595744680851, "calib/step_q_w_n": 282.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1392, "grad_norm": 0.0, "learning_rate": 6.333333333333332e-07, "loss": 0.0, "num_tokens": 22550345.0, "reward": 1.052734375, "reward_std": 0.21600019931793213, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.94921875, "step": 87 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6679403541472506, "calib/avg_num_step_conf": 2.40625, "calib/ece": 0.35699218750000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.234375, "calib/gap": 0.15627151289220276, "calib/mean_conf": 0.7905859375, "calib/mu_c": 0.8790990990990993, "calib/mu_w": 0.7228275862068966, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.35699218750000006, "calib/std_conf": 0.2487815001205799, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.8083908045977012, "calib/step_q_c_n": 261.0, "calib/step_q_gap": 0.06374291727375747, "calib/step_q_w": 0.7446478873239437, "calib/step_q_w_n": 355.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1408, "grad_norm": 0.0, "learning_rate": 6.277777777777777e-07, "loss": 0.0, "num_tokens": 22809337.0, "reward": 0.9140625, "reward_std": 0.15110857784748077, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.9609375, "step": 88 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7653526220614829, "calib/avg_num_step_conf": 2.34375, "calib/ece": 0.38968503937007887, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.20078740157480315, "calib/gap": 0.27345678119349026, "calib/mean_conf": 0.7007086614173228, "calib/mu_c": 0.889113924050633, "calib/mu_w": 0.6156571428571428, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.38968503937007887, "calib/std_conf": 0.3153904744000723, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.8161739130434781, "calib/step_q_c_n": 230.0, "calib/step_q_gap": 0.1399576968272619, "calib/step_q_w": 0.6762162162162162, "calib/step_q_w_n": 370.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 173.0, "completions/max_terminated_length": 173.0, "completions/mean_length": 0.921875, "completions/mean_terminated_length": 118.0, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.1424, "grad_norm": 18.566625595092773, "learning_rate": 6.222222222222223e-07, "loss": 0.0773, "num_tokens": 23070365.0, "reward": 0.771484375, "reward_std": 0.18201754987239838, "rewards/accuracy_reward_step": 0.30859375, "rewards/format_reward_step": 0.92578125, "step": 89 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7787698412698413, "calib/avg_num_step_conf": 2.2265625, "calib/ece": 0.3140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.1875, "calib/gap": 0.25182539682539684, "calib/mean_conf": 0.7515625, "calib/mu_c": 0.8932142857142856, "calib/mu_w": 0.6413888888888888, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.3140625, "calib/std_conf": 0.2732628013355458, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.8295035460992908, "calib/step_q_c_n": 282.0, "calib/step_q_gap": 0.12762854609929086, "calib/step_q_w": 0.7018749999999999, "calib/step_q_w_n": 288.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.144, "grad_norm": 0.0, "learning_rate": 6.166666666666667e-07, "loss": 0.0, "num_tokens": 23332245.0, "reward": 0.8984375, "reward_std": 0.11948448419570923, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.921875, "step": 90 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.728298611111111, "calib/avg_num_step_conf": 2.15234375, "calib/ece": 0.32609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.1875, "calib/gap": 0.22345238095238096, "calib/mean_conf": 0.7635937500000001, "calib/mu_c": 0.8892857142857142, "calib/mu_w": 0.6658333333333333, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.32609375, "calib/std_conf": 0.26812204070336604, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.8298837209302327, "calib/step_q_c_n": 258.0, "calib/step_q_gap": 0.12933764584490848, "calib/step_q_w": 0.7005460750853242, "calib/step_q_w_n": 293.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1456, "grad_norm": 0.0, "learning_rate": 6.111111111111112e-07, "loss": 0.0, "num_tokens": 23586941.0, "reward": 0.89453125, "reward_std": 0.2004837840795517, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.9140625, "step": 91 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7191771642991155, "calib/avg_num_step_conf": 2.171875, "calib/ece": 0.3930196078431373, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.12941176470588237, "calib/gap": 0.20351313320825537, "calib/mean_conf": 0.7498823529411764, "calib/mu_c": 0.880769230769231, "calib/mu_w": 0.6772560975609756, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.3930196078431373, "calib/std_conf": 0.27616772731467004, "calib/step_conf_rate": 0.9375, "calib/step_q_c": 0.8341255605381168, "calib/step_q_c_n": 223.0, "calib/step_q_gap": 0.11947090588346232, "calib/step_q_w": 0.7146546546546545, "calib/step_q_w_n": 333.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 58.0, "completions/max_terminated_length": 58.0, "completions/mean_length": 0.2265625, "completions/mean_terminated_length": 58.0, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.1472, "grad_norm": 2.000593662261963, "learning_rate": 6.055555555555555e-07, "loss": 0.0084, "num_tokens": 23846735.0, "reward": 0.822265625, "reward_std": 0.23607444763183594, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.93359375, "step": 92 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6951848400124262, "calib/avg_num_step_conf": 2.13671875, "calib/ece": 0.3127734375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.1484375, "calib/gap": 0.2170034172103139, "calib/mean_conf": 0.7463671875, "calib/mu_c": 0.8692792792792794, "calib/mu_w": 0.6522758620689655, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.3127734375000001, "calib/std_conf": 0.2700659748993565, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.8295121951219512, "calib/step_q_c_n": 246.0, "calib/step_q_gap": 0.15549226156713392, "calib/step_q_w": 0.6740199335548173, "calib/step_q_w_n": 301.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1488, "grad_norm": 0.0, "learning_rate": 6e-07, "loss": 0.0, "num_tokens": 24108375.0, "reward": 0.9140625, "reward_std": 0.17483949661254883, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.9609375, "step": 93 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8387096774193549, "calib/avg_num_step_conf": 1.83203125, "calib/ece": 0.29234375000000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.1484375, "calib/gap": 0.34038965186841263, "calib/mean_conf": 0.6868749999999999, "calib/mu_c": 0.892970297029703, "calib/mu_w": 0.5525806451612904, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.29234375000000007, "calib/std_conf": 0.32713302435706487, "calib/step_conf_rate": 0.9140625, "calib/step_q_c": 0.8342574257425742, "calib/step_q_c_n": 202.0, "calib/step_q_gap": 0.21414506619201235, "calib/step_q_w": 0.6201123595505619, "calib/step_q_w_n": 267.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1504, "grad_norm": 0.0, "learning_rate": 5.944444444444444e-07, "loss": 0.0, "num_tokens": 24363951.0, "reward": 0.8515625, "reward_std": 0.18971920013427734, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.9140625, "step": 94 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7844712182061581, "calib/avg_num_step_conf": 2.06640625, "calib/ece": 0.40230468750000015, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.1484375, "calib/gap": 0.2171365461847391, "calib/mean_conf": 0.7538671875, "calib/mu_c": 0.8946666666666668, "calib/mu_w": 0.6775301204819277, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.40230468750000015, "calib/std_conf": 0.2606982172663247, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.8169999999999998, "calib/step_q_c_n": 200.0, "calib/step_q_gap": 0.1401306990881458, "calib/step_q_w": 0.676869300911854, "calib/step_q_w_n": 329.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.152, "grad_norm": 0.0, "learning_rate": 5.888888888888889e-07, "loss": 0.0, "num_tokens": 24624599.0, "reward": 0.81640625, "reward_std": 0.16954180598258972, "rewards/accuracy_reward_step": 0.3515625, "rewards/format_reward_step": 0.9296875, "step": 95 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7233352056418898, "calib/avg_num_step_conf": 1.9375, "calib/ece": 0.3401171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.140625, "calib/gap": 0.19886725332334776, "calib/mean_conf": 0.7658984375, "calib/mu_c": 0.8800917431192661, "calib/mu_w": 0.6812244897959183, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.3401171875, "calib/std_conf": 0.25488809860418865, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.8263636363636364, "calib/step_q_c_n": 242.0, "calib/step_q_gap": 0.128253400143164, "calib/step_q_w": 0.6981102362204724, "calib/step_q_w_n": 254.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1536, "grad_norm": 0.0, "learning_rate": 5.833333333333334e-07, "loss": 0.0, "num_tokens": 24885647.0, "reward": 0.90625, "reward_std": 0.15150675177574158, "rewards/accuracy_reward_step": 0.42578125, "rewards/format_reward_step": 0.9609375, "step": 96 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8210475886298307, "calib/avg_num_step_conf": 2.21484375, "calib/ece": 0.3226171874999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.08203125, "calib/gap": 0.2630808048546791, "calib/mean_conf": 0.7171484375, "calib/mu_c": 0.8764356435643564, "calib/mu_w": 0.6133548387096773, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.3226171874999999, "calib/std_conf": 0.26849274285594493, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.8418433179723502, "calib/step_q_c_n": 217.0, "calib/step_q_gap": 0.18964331797235023, "calib/step_q_w": 0.6522, "calib/step_q_w_n": 350.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1552, "grad_norm": 0.0, "learning_rate": 5.777777777777777e-07, "loss": 0.0, "num_tokens": 25146071.0, "reward": 0.87109375, "reward_std": 0.09396559000015259, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.953125, "step": 97 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8168831168831169, "calib/avg_num_step_conf": 1.81640625, "calib/ece": 0.3071093750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.109375, "calib/gap": 0.3438121878121879, "calib/mean_conf": 0.662578125, "calib/mu_c": 0.8841758241758243, "calib/mu_w": 0.5403636363636364, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.88671875, "calib/pce": 0.3071093750000001, "calib/std_conf": 0.3359600192306882, "calib/step_conf_rate": 0.88671875, "calib/step_q_c": 0.832010582010582, "calib/step_q_c_n": 189.0, "calib/step_q_gap": 0.18371348056130665, "calib/step_q_w": 0.6482971014492753, "calib/step_q_w_n": 276.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1568, "grad_norm": 0.0, "learning_rate": 5.722222222222222e-07, "loss": 0.0, "num_tokens": 25405423.0, "reward": 0.798828125, "reward_std": 0.13947027921676636, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.88671875, "step": 98 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8558369005296379, "calib/avg_num_step_conf": 2.21875, "calib/ece": 0.39683593750000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.1484375, "calib/gap": 0.2849952840455633, "calib/mean_conf": 0.6944921875, "calib/mu_c": 0.8937662337662337, "calib/mu_w": 0.6087709497206704, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.39527343750000005, "calib/std_conf": 0.29874564578662033, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.8462359550561798, "calib/step_q_c_n": 178.0, "calib/step_q_gap": 0.20731287813310284, "calib/step_q_w": 0.6389230769230769, "calib/step_q_w_n": 390.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1584, "grad_norm": 0.0, "learning_rate": 5.666666666666666e-07, "loss": 0.0, "num_tokens": 25667567.0, "reward": 0.779296875, "reward_std": 0.1340028941631317, "rewards/accuracy_reward_step": 0.30078125, "rewards/format_reward_step": 0.95703125, "step": 99 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7632051282051282, "calib/avg_num_step_conf": 2.046875, "calib/ece": 0.3394140625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.140625, "calib/gap": 0.2556076923076924, "calib/mean_conf": 0.7300390625, "calib/mu_c": 0.8858000000000001, "calib/mu_w": 0.6301923076923077, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.3394140625000001, "calib/std_conf": 0.29843667937959817, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.8273076923076924, "calib/step_q_c_n": 208.0, "calib/step_q_gap": 0.15468111002921148, "calib/step_q_w": 0.6726265822784809, "calib/step_q_w_n": 316.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 5.611111111111111e-07, "loss": 0.0, "num_tokens": 25929711.0, "reward": 0.8515625, "reward_std": 0.1345345377922058, "rewards/accuracy_reward_step": 0.390625, "rewards/format_reward_step": 0.921875, "step": 100 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7967128630127447, "calib/avg_num_step_conf": 1.9453125, "calib/ece": 0.33968750000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.1171875, "calib/gap": 0.3065756668291665, "calib/mean_conf": 0.6639062499999999, "calib/mu_c": 0.8710843373493977, "calib/mu_w": 0.5645086705202312, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.90625, "calib/pce": 0.33968750000000003, "calib/std_conf": 0.32286978406307626, "calib/step_conf_rate": 0.90625, "calib/step_q_c": 0.8103314917127072, "calib/step_q_c_n": 181.0, "calib/step_q_gap": 0.19449552956759697, "calib/step_q_w": 0.6158359621451103, "calib/step_q_w_n": 317.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1616, "grad_norm": 0.0, "learning_rate": 5.555555555555555e-07, "loss": 0.0, "num_tokens": 26191759.0, "reward": 0.775390625, "reward_std": 0.1538757085800171, "rewards/accuracy_reward_step": 0.32421875, "rewards/format_reward_step": 0.90234375, "step": 101 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8637359198998747, "calib/avg_num_step_conf": 1.7109375, "calib/ece": 0.4073437500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.0859375, "calib/gap": 0.29232790988735924, "calib/mean_conf": 0.6729687499999999, "calib/mu_c": 0.8876470588235295, "calib/mu_w": 0.5953191489361702, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8984375, "calib/pce": 0.4073437500000001, "calib/std_conf": 0.29595676250668357, "calib/step_conf_rate": 0.8984375, "calib/step_q_c": 0.8413432835820894, "calib/step_q_c_n": 134.0, "calib/step_q_gap": 0.19173802042419463, "calib/step_q_w": 0.6496052631578948, "calib/step_q_w_n": 304.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1632, "grad_norm": 0.0, "learning_rate": 5.5e-07, "loss": 0.0, "num_tokens": 26453903.0, "reward": 0.71484375, "reward_std": 0.1617899388074875, "rewards/accuracy_reward_step": 0.265625, "rewards/format_reward_step": 0.8984375, "step": 102 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.8071111111111111, "calib/avg_num_step_conf": 2.03515625, "calib/ece": 0.30921568627450985, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.09803921568627451, "calib/gap": 0.2580285714285715, "calib/mean_conf": 0.7209803921568628, "calib/mu_c": 0.8727619047619048, "calib/mu_w": 0.6147333333333334, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.30921568627450985, "calib/std_conf": 0.2757311485527146, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.8194270833333334, "calib/step_q_c_n": 192.0, "calib/step_q_gap": 0.16100763044579547, "calib/step_q_w": 0.6584194528875379, "calib/step_q_w_n": 329.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 0.3515625, "completions/mean_terminated_length": 90.0, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.1648, "grad_norm": 13.912463188171387, "learning_rate": 5.444444444444443e-07, "loss": 0.0387, "num_tokens": 26715417.0, "reward": 0.892578125, "reward_std": 0.1751222312450409, "rewards/accuracy_reward_step": 0.41015625, "rewards/format_reward_step": 0.96484375, "step": 103 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7132736306201313, "calib/avg_num_step_conf": 1.88671875, "calib/ece": 0.30765624999999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.08203125, "calib/gap": 0.16475986014843913, "calib/mean_conf": 0.7725, "calib/mu_c": 0.8606722689075632, "calib/mu_w": 0.6959124087591241, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.30765624999999996, "calib/std_conf": 0.2190640602654849, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.8040092165898618, "calib/step_q_c_n": 217.0, "calib/step_q_gap": 0.08070094591317001, "calib/step_q_w": 0.7233082706766918, "calib/step_q_w_n": 266.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1664, "grad_norm": 0.0, "learning_rate": 5.388888888888888e-07, "loss": 0.0, "num_tokens": 26970097.0, "reward": 0.939453125, "reward_std": 0.2298126220703125, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.94921875, "step": 104 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7389437526835552, "calib/avg_num_step_conf": 2.0859375, "calib/ece": 0.3115625000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.12890625, "calib/gap": 0.19483285284916885, "calib/mean_conf": 0.77640625, "calib/mu_c": 0.880672268907563, "calib/mu_w": 0.6858394160583942, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.3115625000000001, "calib/std_conf": 0.23548564969215746, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.8402409638554217, "calib/step_q_c_n": 249.0, "calib/step_q_gap": 0.12150412175015857, "calib/step_q_w": 0.7187368421052631, "calib/step_q_w_n": 285.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.168, "grad_norm": 0.0, "learning_rate": 5.333333333333333e-07, "loss": 0.0, "num_tokens": 27231649.0, "reward": 0.953125, "reward_std": 0.19588688015937805, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.9765625, "step": 105 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7503485454038479, "calib/avg_num_step_conf": 1.8125, "calib/ece": 0.4481640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.06640625, "calib/gap": 0.26279672832047585, "calib/mean_conf": 0.6551953125, "calib/mu_c": 0.8635849056603773, "calib/mu_w": 0.6007881773399014, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.4481640625, "calib/std_conf": 0.3329671896043623, "calib/step_conf_rate": 0.9140625, "calib/step_q_c": 0.8060465116279069, "calib/step_q_c_n": 129.0, "calib/step_q_gap": 0.12658382506074273, "calib/step_q_w": 0.6794626865671641, "calib/step_q_w_n": 335.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1696, "grad_norm": 0.0, "learning_rate": 5.277777777777777e-07, "loss": 0.0, "num_tokens": 27493401.0, "reward": 0.662109375, "reward_std": 0.21436436474323273, "rewards/accuracy_reward_step": 0.20703125, "rewards/format_reward_step": 0.91015625, "step": 106 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6810475625041229, "calib/avg_num_step_conf": 1.58984375, "calib/ece": 0.41011718750000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.140625, "calib/gap": 0.17468500560723021, "calib/mean_conf": 0.7733984375, "calib/mu_c": 0.8846236559139786, "calib/mu_w": 0.7099386503067484, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.90234375, "calib/pce": 0.41011718750000004, "calib/std_conf": 0.2439225619895761, "calib/step_conf_rate": 0.90234375, "calib/step_q_c": 0.859607843137255, "calib/step_q_c_n": 153.0, "calib/step_q_gap": 0.13110390612938116, "calib/step_q_w": 0.7285039370078739, "calib/step_q_w_n": 254.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1712, "grad_norm": 0.0, "learning_rate": 5.222222222222223e-07, "loss": 0.0, "num_tokens": 27754953.0, "reward": 0.814453125, "reward_std": 0.1844952404499054, "rewards/accuracy_reward_step": 0.36328125, "rewards/format_reward_step": 0.90234375, "step": 107 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8188978829389788, "calib/avg_num_step_conf": 1.51171875, "calib/ece": 0.2666406250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.1015625, "calib/gap": 0.2998978829389789, "calib/mean_conf": 0.6963281250000001, "calib/mu_c": 0.8673636363636364, "calib/mu_w": 0.5674657534246575, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8203125, "calib/pce": 0.2666406250000001, "calib/std_conf": 0.2968121541210608, "calib/step_conf_rate": 0.8203125, "calib/step_q_c": 0.8295327102803738, "calib/step_q_c_n": 214.0, "calib/step_q_gap": 0.14311652530927543, "calib/step_q_w": 0.6864161849710984, "calib/step_q_w_n": 173.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1728, "grad_norm": 0.0, "learning_rate": 5.166666666666667e-07, "loss": 0.0, "num_tokens": 28015593.0, "reward": 0.83984375, "reward_std": 0.15671934187412262, "rewards/accuracy_reward_step": 0.4296875, "rewards/format_reward_step": 0.8203125, "step": 108 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.727376707259638, "calib/avg_num_step_conf": 1.8828125, "calib/ece": 0.41750000000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.1171875, "calib/gap": 0.17363654713045829, "calib/mean_conf": 0.76515625, "calib/mu_c": 0.8784269662921349, "calib/mu_w": 0.7047904191616766, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.41750000000000004, "calib/std_conf": 0.2495370224754986, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.8147619047619048, "calib/step_q_c_n": 189.0, "calib/step_q_gap": 0.08438647814074451, "calib/step_q_w": 0.7303754266211603, "calib/step_q_w_n": 293.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1744, "grad_norm": 0.0, "learning_rate": 5.111111111111111e-07, "loss": 0.0, "num_tokens": 28271961.0, "reward": 0.80859375, "reward_std": 0.18457874655723572, "rewards/accuracy_reward_step": 0.34765625, "rewards/format_reward_step": 0.921875, "step": 109 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.787047371031746, "calib/avg_num_step_conf": 2.2421875, "calib/ece": 0.30824218749999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.1328125, "calib/gap": 0.22788690476190476, "calib/mean_conf": 0.7457421875, "calib/mu_c": 0.8739285714285714, "calib/mu_w": 0.6460416666666666, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.30824218749999993, "calib/std_conf": 0.24994733026722818, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.8352301255230126, "calib/step_q_c_n": 239.0, "calib/step_q_gap": 0.13048385686629627, "calib/step_q_w": 0.7047462686567163, "calib/step_q_w_n": 335.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.176, "grad_norm": 0.0, "learning_rate": 5.055555555555555e-07, "loss": 0.0, "num_tokens": 28534105.0, "reward": 0.900390625, "reward_std": 0.16185137629508972, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.92578125, "step": 110 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8895753622279371, "calib/avg_num_step_conf": 1.4921875, "calib/ece": 0.2669531250000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.8671875, "calib/frac_conf_gt_0.9": 0.078125, "calib/gap": 0.3484952643793515, "calib/mean_conf": 0.6849218749999999, "calib/mu_c": 0.8877570093457945, "calib/mu_w": 0.539261744966443, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8671875, "calib/pce": 0.2669531250000002, "calib/std_conf": 0.3133712756403885, "calib/step_conf_rate": 0.8671875, "calib/step_q_c": 0.8516292134831461, "calib/step_q_c_n": 178.0, "calib/step_q_gap": 0.20246254681647957, "calib/step_q_w": 0.6491666666666666, "calib/step_q_w_n": 204.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1776, "grad_norm": 0.0, "learning_rate": 5e-07, "loss": 0.0, "num_tokens": 28793977.0, "reward": 0.8515625, "reward_std": 0.18710467219352722, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.8671875, "step": 111 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7499269005847954, "calib/avg_num_step_conf": 1.6796875, "calib/ece": 0.4177343750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.08984375, "calib/gap": 0.22549122807017552, "calib/mean_conf": 0.714609375, "calib/mu_c": 0.8731578947368421, "calib/mu_w": 0.6476666666666666, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.4177343750000001, "calib/std_conf": 0.2815176569277838, "calib/step_conf_rate": 0.9140625, "calib/step_q_c": 0.8282608695652175, "calib/step_q_c_n": 138.0, "calib/step_q_gap": 0.12326086956521742, "calib/step_q_w": 0.7050000000000001, "calib/step_q_w_n": 292.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1792, "grad_norm": 0.0, "learning_rate": 4.944444444444445e-07, "loss": 0.0, "num_tokens": 29056121.0, "reward": 0.75390625, "reward_std": 0.17721685767173767, "rewards/accuracy_reward_step": 0.296875, "rewards/format_reward_step": 0.9140625, "step": 112 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6963464486458866, "calib/avg_num_step_conf": 1.80078125, "calib/ece": 0.39121568627450987, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.13333333333333333, "calib/gap": 0.15047713336739899, "calib/mean_conf": 0.7872941176470588, "calib/mu_c": 0.8769902912621359, "calib/mu_w": 0.7265131578947369, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.3872941176470589, "calib/std_conf": 0.20832885900616288, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.8564848484848484, "calib/step_q_c_n": 165.0, "calib/step_q_gap": 0.11553890253890253, "calib/step_q_w": 0.7409459459459459, "calib/step_q_w_n": 296.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 109.0, "completions/max_terminated_length": 109.0, "completions/mean_length": 0.42578125, "completions/mean_terminated_length": 109.0, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.1808, "grad_norm": 13.413500785827637, "learning_rate": 4.888888888888889e-07, "loss": 0.0386, "num_tokens": 29312030.0, "reward": 0.87109375, "reward_std": 0.26899805665016174, "rewards/accuracy_reward_step": 0.40234375, "rewards/format_reward_step": 0.9375, "step": 113 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.8162942392146733, "calib/avg_num_step_conf": 1.37109375, "calib/ece": 0.2621875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.77734375, "calib/frac_conf_gt_0.9": 0.0859375, "calib/gap": 0.3598553345388787, "calib/mean_conf": 0.6371875, "calib/mu_c": 0.8592857142857141, "calib/mu_w": 0.4994303797468354, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.78125, "calib/pce": 0.25828125, "calib/std_conf": 0.35472466765612737, "calib/step_conf_rate": 0.78125, "calib/step_q_c": 0.834406779661017, "calib/step_q_c_n": 177.0, "calib/step_q_gap": 0.2173378141437755, "calib/step_q_w": 0.6170689655172414, "calib/step_q_w_n": 174.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1824, "grad_norm": 0.0, "learning_rate": 4.833333333333333e-07, "loss": 0.0, "num_tokens": 29572862.0, "reward": 0.771484375, "reward_std": 0.11413875222206116, "rewards/accuracy_reward_step": 0.3828125, "rewards/format_reward_step": 0.77734375, "step": 114 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.81134033203125, "calib/avg_num_step_conf": 1.8671875, "calib/ece": 0.24710937500000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.87109375, "calib/frac_conf_gt_0.9": 0.15234375, "calib/gap": 0.270625, "calib/mean_conf": 0.747109375, "calib/mu_c": 0.882421875, "calib/mu_w": 0.611796875, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.87109375, "calib/pce": 0.24710937500000002, "calib/std_conf": 0.26624164407002404, "calib/step_conf_rate": 0.87109375, "calib/step_q_c": 0.8349027237354085, "calib/step_q_c_n": 257.0, "calib/step_q_gap": 0.13060408120147193, "calib/step_q_w": 0.7042986425339366, "calib/step_q_w_n": 221.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.184, "grad_norm": 0.0, "learning_rate": 4.777777777777778e-07, "loss": 0.0, "num_tokens": 29833878.0, "reward": 0.935546875, "reward_std": 0.16707748174667358, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.87109375, "step": 115 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7463149971774447, "calib/avg_num_step_conf": 1.82421875, "calib/ece": 0.3782421874999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.13671875, "calib/gap": 0.14845574860440292, "calib/mean_conf": 0.7962109374999999, "calib/mu_c": 0.8826168224299064, "calib/mu_w": 0.7341610738255034, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.3782421874999998, "calib/std_conf": 0.18975955807381903, "calib/step_conf_rate": 0.9296875, "calib/step_q_c": 0.8487557603686635, "calib/step_q_c_n": 217.0, "calib/step_q_gap": 0.1251557603686636, "calib/step_q_w": 0.7235999999999999, "calib/step_q_w_n": 250.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1856, "grad_norm": 0.0, "learning_rate": 4.722222222222222e-07, "loss": 0.0, "num_tokens": 30093774.0, "reward": 0.8828125, "reward_std": 0.1823364645242691, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.9296875, "step": 116 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7509335782063055, "calib/avg_num_step_conf": 1.71875, "calib/ece": 0.2668359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.140625, "calib/gap": 0.2454441383532292, "calib/mean_conf": 0.7394921875, "calib/mu_c": 0.8689256198347107, "calib/mu_w": 0.6234814814814815, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.90234375, "calib/pce": 0.2668359375, "calib/std_conf": 0.2719032599132729, "calib/step_conf_rate": 0.90234375, "calib/step_q_c": 0.8196265560165974, "calib/step_q_c_n": 241.0, "calib/step_q_gap": 0.11384766154423565, "calib/step_q_w": 0.7057788944723618, "calib/step_q_w_n": 199.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1872, "grad_norm": 0.0, "learning_rate": 4.6666666666666666e-07, "loss": 0.0, "num_tokens": 30350870.0, "reward": 0.923828125, "reward_std": 0.241466224193573, "rewards/accuracy_reward_step": 0.47265625, "rewards/format_reward_step": 0.90234375, "step": 117 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7673202614379085, "calib/avg_num_step_conf": 2.0234375, "calib/ece": 0.408671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.16015625, "calib/gap": 0.2034303405572756, "calib/mean_conf": 0.7407031249999999, "calib/mu_c": 0.8765882352941176, "calib/mu_w": 0.673157894736842, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.408671875, "calib/std_conf": 0.260792612750504, "calib/step_conf_rate": 0.91796875, "calib/step_q_c": 0.843072625698324, "calib/step_q_c_n": 179.0, "calib/step_q_gap": 0.1142230681762001, "calib/step_q_w": 0.7288495575221239, "calib/step_q_w_n": 339.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1888, "grad_norm": 0.0, "learning_rate": 4.611111111111111e-07, "loss": 0.0, "num_tokens": 30609454.0, "reward": 0.7890625, "reward_std": 0.22457295656204224, "rewards/accuracy_reward_step": 0.33203125, "rewards/format_reward_step": 0.9140625, "step": 118 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7685700261780105, "calib/avg_num_step_conf": 1.8203125, "calib/ece": 0.4750588235294119, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.1568627450980392, "calib/gap": 0.22265543193717285, "calib/mean_conf": 0.7260392156862745, "calib/mu_c": 0.8928125, "calib/mu_w": 0.6701570680628272, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.4750588235294119, "calib/std_conf": 0.2798185659220613, "calib/step_conf_rate": 0.9296875, "calib/step_q_c": 0.8353543307086614, "calib/step_q_c_n": 127.0, "calib/step_q_gap": 0.12830418321603598, "calib/step_q_w": 0.7070501474926254, "calib/step_q_w_n": 339.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 115.0, "completions/max_terminated_length": 115.0, "completions/mean_length": 0.44921875, "completions/mean_terminated_length": 115.0, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.1904, "grad_norm": 12.073020935058594, "learning_rate": 4.555555555555555e-07, "loss": 0.0386, "num_tokens": 30864129.0, "reward": 0.71484375, "reward_std": 0.1937408149242401, "rewards/accuracy_reward_step": 0.25, "rewards/format_reward_step": 0.9296875, "step": 119 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7531453946040503, "calib/avg_num_step_conf": 1.7421875, "calib/ece": 0.4002734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.125, "calib/gap": 0.20211061024019383, "calib/mean_conf": 0.7479296875, "calib/mu_c": 0.8797752808988766, "calib/mu_w": 0.6776646706586827, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.4002734375, "calib/std_conf": 0.26558604666125124, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.8450292397660819, "calib/step_q_c_n": 171.0, "calib/step_q_gap": 0.159829239766082, "calib/step_q_w": 0.6851999999999999, "calib/step_q_w_n": 275.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.192, "grad_norm": 0.0, "learning_rate": 4.5e-07, "loss": 0.0, "num_tokens": 31120377.0, "reward": 0.818359375, "reward_std": 0.15873654186725616, "rewards/accuracy_reward_step": 0.34765625, "rewards/format_reward_step": 0.94140625, "step": 120 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7926492262343405, "calib/avg_num_step_conf": 1.859375, "calib/ece": 0.20500000000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.1640625, "calib/gap": 0.27243429132891184, "calib/mean_conf": 0.7440625, "calib/mu_c": 0.8696376811594202, "calib/mu_w": 0.5972033898305084, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.20500000000000004, "calib/std_conf": 0.26311813381777777, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.8423367697594502, "calib/step_q_c_n": 291.0, "calib/step_q_gap": 0.1619043373270177, "calib/step_q_w": 0.6804324324324325, "calib/step_q_w_n": 185.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1936, "grad_norm": 0.0, "learning_rate": 4.444444444444444e-07, "loss": 0.0, "num_tokens": 31376505.0, "reward": 1.0, "reward_std": 0.18774083256721497, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.921875, "step": 121 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7796604437229437, "calib/avg_num_step_conf": 1.65234375, "calib/ece": 0.33843750000000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.8515625, "calib/frac_conf_gt_0.9": 0.0703125, "calib/gap": 0.26956709956709957, "calib/mean_conf": 0.6821875, "calib/mu_c": 0.859090909090909, "calib/mu_w": 0.5895238095238095, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.86328125, "calib/pce": 0.33843750000000006, "calib/std_conf": 0.3078794343468722, "calib/step_conf_rate": 0.86328125, "calib/step_q_c": 0.8230000000000001, "calib/step_q_c_n": 150.0, "calib/step_q_gap": 0.19248717948717953, "calib/step_q_w": 0.6305128205128205, "calib/step_q_w_n": 273.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1952, "grad_norm": 0.0, "learning_rate": 4.3888888888888884e-07, "loss": 0.0, "num_tokens": 31637169.0, "reward": 0.76953125, "reward_std": 0.1618141233921051, "rewards/accuracy_reward_step": 0.34375, "rewards/format_reward_step": 0.8515625, "step": 122 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7666422152943334, "calib/avg_num_step_conf": 1.61328125, "calib/ece": 0.2636718750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.10546875, "calib/gap": 0.2334384742343666, "calib/mean_conf": 0.739453125, "calib/mu_c": 0.8607317073170732, "calib/mu_w": 0.6272932330827066, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.89453125, "calib/pce": 0.26132812500000013, "calib/std_conf": 0.2541693228494233, "calib/step_conf_rate": 0.89453125, "calib/step_q_c": 0.8172018348623852, "calib/step_q_c_n": 218.0, "calib/step_q_gap": 0.1495095271700776, "calib/step_q_w": 0.6676923076923076, "calib/step_q_w_n": 195.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.1968, "grad_norm": 0.0, "learning_rate": 4.3333333333333335e-07, "loss": 0.0, "num_tokens": 31893617.0, "reward": 0.92578125, "reward_std": 0.20838135480880737, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.890625, "step": 123 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7086222992798079, "calib/avg_num_step_conf": 1.98828125, "calib/ece": 0.409372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.17254901960784313, "calib/gap": 0.17711189650573456, "calib/mean_conf": 0.770156862745098, "calib/mu_c": 0.8833695652173911, "calib/mu_w": 0.7062576687116565, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.90234375, "calib/pce": 0.409372549019608, "calib/std_conf": 0.24438312676815713, "calib/step_conf_rate": 0.90234375, "calib/step_q_c": 0.841400966183575, "calib/step_q_c_n": 207.0, "calib/step_q_gap": 0.12030825095178699, "calib/step_q_w": 0.721092715231788, "calib/step_q_w_n": 302.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 174.0, "completions/max_terminated_length": 174.0, "completions/mean_length": 0.6796875, "completions/mean_terminated_length": 174.0, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.1984, "grad_norm": 6.5422492027282715, "learning_rate": 4.2777777777777775e-07, "loss": 0.0211, "num_tokens": 32152799.0, "reward": 0.80859375, "reward_std": 0.24629858136177063, "rewards/accuracy_reward_step": 0.359375, "rewards/format_reward_step": 0.8984375, "step": 124 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.8502252252252251, "calib/avg_num_step_conf": 1.93359375, "calib/ece": 0.30007812500000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.078125, "calib/gap": 0.25560060060060086, "calib/mean_conf": 0.721953125, "calib/mu_c": 0.8697222222222225, "calib/mu_w": 0.6141216216216216, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.30007812500000003, "calib/std_conf": 0.24888205851915962, "calib/step_conf_rate": 0.94921875, "calib/step_q_c": 0.8381142857142858, "calib/step_q_c_n": 175.0, "calib/step_q_gap": 0.1968330357142859, "calib/step_q_w": 0.6412812499999999, "calib/step_q_w_n": 320.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 4.222222222222222e-07, "loss": 0.0, "num_tokens": 32413503.0, "reward": 0.892578125, "reward_std": 0.148696631193161, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.94140625, "step": 125 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7811994525320394, "calib/avg_num_step_conf": 1.6484375, "calib/ece": 0.30580392156862746, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.11764705882352941, "calib/gap": 0.24642777155655093, "calib/mean_conf": 0.7528627450980392, "calib/mu_c": 0.8891228070175439, "calib/mu_w": 0.6426950354609929, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.30580392156862746, "calib/std_conf": 0.2725235028258378, "calib/step_conf_rate": 0.91015625, "calib/step_q_c": 0.8609844559585493, "calib/step_q_c_n": 193.0, "calib/step_q_gap": 0.13076611534719573, "calib/step_q_w": 0.7302183406113536, "calib/step_q_w_n": 229.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 120.0, "completions/max_terminated_length": 120.0, "completions/mean_length": 0.46875, "completions/mean_terminated_length": 120.0, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.2016, "grad_norm": 11.591796875, "learning_rate": 4.1666666666666667e-07, "loss": 0.0386, "num_tokens": 32674999.0, "reward": 0.8984375, "reward_std": 0.1374414563179016, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.90625, "step": 126 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.76854035639413, "calib/avg_num_step_conf": 1.76171875, "calib/ece": 0.32592156862745103, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.15294117647058825, "calib/gap": 0.28083333333333327, "calib/mean_conf": 0.702392156862745, "calib/mu_c": 0.8775, "calib/mu_w": 0.5966666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.86328125, "calib/pce": 0.32592156862745103, "calib/std_conf": 0.3155029646814732, "calib/step_conf_rate": 0.86328125, "calib/step_q_c": 0.8281818181818181, "calib/step_q_c_n": 198.0, "calib/step_q_gap": 0.1588537549407113, "calib/step_q_w": 0.6693280632411068, "calib/step_q_w_n": 253.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 206.0, "completions/max_terminated_length": 206.0, "completions/mean_length": 0.8046875, "completions/mean_terminated_length": 206.0, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.2032, "grad_norm": 0.0, "learning_rate": 4.1111111111111107e-07, "loss": 0.0, "num_tokens": 32937005.0, "reward": 0.806640625, "reward_std": 0.15518707036972046, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.85546875, "step": 127 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.641909120627656, "calib/avg_num_step_conf": 1.5390625, "calib/ece": 0.4167187500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.11328125, "calib/gap": 0.12800523046747292, "calib/mean_conf": 0.7878124999999999, "calib/mu_c": 0.868315789473684, "calib/mu_w": 0.7403105590062111, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.4167187500000001, "calib/std_conf": 0.22554426360195906, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.8166473988439304, "calib/step_q_c_n": 173.0, "calib/step_q_gap": 0.045108937305468944, "calib/step_q_w": 0.7715384615384615, "calib/step_q_w_n": 221.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2048, "grad_norm": 0.0, "learning_rate": 4.055555555555555e-07, "loss": 0.0, "num_tokens": 33197445.0, "reward": 0.833984375, "reward_std": 0.15270306169986725, "rewards/accuracy_reward_step": 0.37109375, "rewards/format_reward_step": 0.92578125, "step": 128 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.8298757763975155, "calib/avg_num_step_conf": 1.61328125, "calib/ece": 0.27121568627450976, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 0.11764705882352941, "calib/gap": 0.27840062111801234, "calib/mean_conf": 0.7221960784313726, "calib/mu_c": 0.8750434782608695, "calib/mu_w": 0.5966428571428571, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8671875, "calib/pce": 0.27121568627450976, "calib/std_conf": 0.2796494317721809, "calib/step_conf_rate": 0.8671875, "calib/step_q_c": 0.8413861386138615, "calib/step_q_c_n": 202.0, "calib/step_q_gap": 0.17053305804514118, "calib/step_q_w": 0.6708530805687203, "calib/step_q_w_n": 211.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2064, "grad_norm": 0.0, "learning_rate": 4e-07, "loss": 0.0, "num_tokens": 33454925.0, "reward": 0.880859375, "reward_std": 0.2340913712978363, "rewards/accuracy_reward_step": 0.44921875, "rewards/format_reward_step": 0.86328125, "step": 129 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8435849056603772, "calib/avg_num_step_conf": 1.80859375, "calib/ece": 0.3078906250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.0859375, "calib/gap": 0.2810037735849057, "calib/mean_conf": 0.721953125, "calib/mu_c": 0.8866037735849057, "calib/mu_w": 0.6056, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.89453125, "calib/pce": 0.3078906250000001, "calib/std_conf": 0.26936719418432226, "calib/step_conf_rate": 0.89453125, "calib/step_q_c": 0.8460000000000001, "calib/step_q_c_n": 190.0, "calib/step_q_gap": 0.18424175824175837, "calib/step_q_w": 0.6617582417582417, "calib/step_q_w_n": 273.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.208, "grad_norm": 0.0, "learning_rate": 3.9444444444444444e-07, "loss": 0.0, "num_tokens": 33710109.0, "reward": 0.861328125, "reward_std": 0.12092234194278717, "rewards/accuracy_reward_step": 0.4140625, "rewards/format_reward_step": 0.89453125, "step": 130 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7044871794871794, "calib/avg_num_step_conf": 1.734375, "calib/ece": 0.33089843750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.13671875, "calib/gap": 0.21905384615384627, "calib/mean_conf": 0.7094140625, "calib/mu_c": 0.8429000000000002, "calib/mu_w": 0.6238461538461539, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.32484375, "calib/std_conf": 0.29053568593590373, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.8193820224719103, "calib/step_q_c_n": 178.0, "calib/step_q_gap": 0.12550984202078252, "calib/step_q_w": 0.6938721804511278, "calib/step_q_w_n": 266.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2096, "grad_norm": 0.0, "learning_rate": 3.888888888888889e-07, "loss": 0.0, "num_tokens": 33972253.0, "reward": 0.8515625, "reward_std": 0.23300394415855408, "rewards/accuracy_reward_step": 0.390625, "rewards/format_reward_step": 0.921875, "step": 131 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7979328723710746, "calib/avg_num_step_conf": 1.46875, "calib/ece": 0.40511718750000014, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.8359375, "calib/frac_conf_gt_0.9": 0.125, "calib/gap": 0.2582699510227602, "calib/mean_conf": 0.7082421875, "calib/mu_c": 0.887820512820513, "calib/mu_w": 0.6295505617977528, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8359375, "calib/pce": 0.4043359375000001, "calib/std_conf": 0.2905242364592236, "calib/step_conf_rate": 0.8359375, "calib/step_q_c": 0.8483333333333334, "calib/step_q_c_n": 150.0, "calib/step_q_gap": 0.14718289085545733, "calib/step_q_w": 0.7011504424778761, "calib/step_q_w_n": 226.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2112, "grad_norm": 0.0, "learning_rate": 3.8333333333333335e-07, "loss": 0.0, "num_tokens": 34232701.0, "reward": 0.72265625, "reward_std": 0.17576810717582703, "rewards/accuracy_reward_step": 0.3046875, "rewards/format_reward_step": 0.8359375, "step": 132 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7485119047619048, "calib/avg_num_step_conf": 1.61328125, "calib/ece": 0.40699218750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.8671875, "calib/frac_conf_gt_0.9": 0.109375, "calib/gap": 0.1980980066445185, "calib/mean_conf": 0.7351171875, "calib/mu_c": 0.8682142857142858, "calib/mu_w": 0.6701162790697673, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.87109375, "calib/pce": 0.40699218750000005, "calib/std_conf": 0.2587771459520524, "calib/step_conf_rate": 0.87109375, "calib/step_q_c": 0.8102068965517241, "calib/step_q_c_n": 145.0, "calib/step_q_gap": 0.07371435923829128, "calib/step_q_w": 0.7364925373134328, "calib/step_q_w_n": 268.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2128, "grad_norm": 0.0, "learning_rate": 3.7777777777777775e-07, "loss": 0.0, "num_tokens": 34489189.0, "reward": 0.76171875, "reward_std": 0.1624026745557785, "rewards/accuracy_reward_step": 0.328125, "rewards/format_reward_step": 0.8671875, "step": 133 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6898434898434899, "calib/avg_num_step_conf": 1.76953125, "calib/ece": 0.41687500000000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.140625, "calib/gap": 0.15460006660006664, "calib/mean_conf": 0.7645312499999999, "calib/mu_c": 0.8641758241758242, "calib/mu_w": 0.7095757575757575, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.41296875000000005, "calib/std_conf": 0.24761632170242232, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.8281756756756757, "calib/step_q_c_n": 148.0, "calib/step_q_gap": 0.0925035445281348, "calib/step_q_w": 0.7356721311475409, "calib/step_q_w_n": 305.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2144, "grad_norm": 0.0, "learning_rate": 3.722222222222222e-07, "loss": 0.0, "num_tokens": 34750813.0, "reward": 0.833984375, "reward_std": 0.16388335824012756, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.95703125, "step": 134 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.8336483336483337, "calib/avg_num_step_conf": 1.77734375, "calib/ece": 0.26476377952755914, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.09448818897637795, "calib/gap": 0.2992912492912495, "calib/mean_conf": 0.7017716535433071, "calib/mu_c": 0.8702702702702705, "calib/mu_w": 0.570979020979021, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.88671875, "calib/pce": 0.26476377952755914, "calib/std_conf": 0.2823460681302369, "calib/step_conf_rate": 0.88671875, "calib/step_q_c": 0.8374324324324326, "calib/step_q_c_n": 222.0, "calib/step_q_gap": 0.19880582299037253, "calib/step_q_w": 0.6386266094420601, "calib/step_q_w_n": 233.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 391.0, "completions/max_terminated_length": 391.0, "completions/mean_length": 1.80859375, "completions/mean_terminated_length": 231.5, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.216, "grad_norm": 6.3882737159729, "learning_rate": 3.666666666666666e-07, "loss": 0.0316, "num_tokens": 35013356.0, "reward": 0.873046875, "reward_std": 0.16735684871673584, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.87890625, "step": 135 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.8624868972746331, "calib/avg_num_step_conf": 1.5625, "calib/ece": 0.3145490196078432, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.08235294117647059, "calib/gap": 0.31093356918239023, "calib/mean_conf": 0.6910196078431373, "calib/mu_c": 0.8848958333333335, "calib/mu_w": 0.5739622641509433, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.3145490196078432, "calib/std_conf": 0.29591556096862437, "calib/step_conf_rate": 0.91015625, "calib/step_q_c": 0.8440853658536586, "calib/step_q_c_n": 164.0, "calib/step_q_gap": 0.20489045059942146, "calib/step_q_w": 0.6391949152542371, "calib/step_q_w_n": 236.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 150.0, "completions/max_terminated_length": 150.0, "completions/mean_length": 0.5859375, "completions/mean_terminated_length": 150.0, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.2176, "grad_norm": 12.954962730407715, "learning_rate": 3.6111111111111107e-07, "loss": 0.0386, "num_tokens": 35275538.0, "reward": 0.83203125, "reward_std": 0.16031301021575928, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.90625, "step": 136 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.8614053216223199, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.23367187500000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.7890625, "calib/frac_conf_gt_0.9": 0.046875, "calib/gap": 0.4158847842934643, "calib/mean_conf": 0.616484375, "calib/mu_c": 0.8731632653061225, "calib/mu_w": 0.4572784810126582, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.79296875, "calib/pce": 0.23367187500000003, "calib/std_conf": 0.3621260783634056, "calib/step_conf_rate": 0.79296875, "calib/step_q_c": 0.8496350364963504, "calib/step_q_c_n": 137.0, "calib/step_q_gap": 0.3046350364963505, "calib/step_q_w": 0.5449999999999999, "calib/step_q_w_n": 130.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2192, "grad_norm": 0.0, "learning_rate": 3.5555555555555553e-07, "loss": 0.0, "num_tokens": 35537098.0, "reward": 0.77734375, "reward_std": 0.15790295600891113, "rewards/accuracy_reward_step": 0.3828125, "rewards/format_reward_step": 0.7890625, "step": 137 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.8096899224806201, "calib/avg_num_step_conf": 1.25, "calib/ece": 0.31936254980079687, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.77734375, "calib/frac_conf_gt_0.9": 0.11553784860557768, "calib/gap": 0.321377026074701, "calib/mean_conf": 0.66199203187251, "calib/mu_c": 0.8732558139534888, "calib/mu_w": 0.5518787878787879, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.77734375, "calib/pce": 0.31936254980079687, "calib/std_conf": 0.3190698882094751, "calib/step_conf_rate": 0.77734375, "calib/step_q_c": 0.8474468085106381, "calib/step_q_c_n": 141.0, "calib/step_q_gap": 0.1861060263877331, "calib/step_q_w": 0.661340782122905, "calib/step_q_w_n": 179.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 98.0, "completions/max_terminated_length": 98.0, "completions/mean_length": 1.03125, "completions/mean_terminated_length": 88.0, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.2208, "grad_norm": 14.619424819946289, "learning_rate": 3.5e-07, "loss": 0.0438, "num_tokens": 35798546.0, "reward": 0.724609375, "reward_std": 0.19636039435863495, "rewards/accuracy_reward_step": 0.3359375, "rewards/format_reward_step": 0.77734375, "step": 138 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7978445830969937, "calib/avg_num_step_conf": 1.6484375, "calib/ece": 0.48445312499999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.84765625, "calib/frac_conf_gt_0.9": 0.0859375, "calib/gap": 0.27447078842881467, "calib/mean_conf": 0.6446093749999999, "calib/mu_c": 0.8751219512195123, "calib/mu_w": 0.6006511627906976, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8515625, "calib/pce": 0.48445312499999993, "calib/std_conf": 0.32561096981230436, "calib/step_conf_rate": 0.8515625, "calib/step_q_c": 0.8340624999999999, "calib/step_q_c_n": 64.0, "calib/step_q_gap": 0.17202339385474852, "calib/step_q_w": 0.6620391061452514, "calib/step_q_w_n": 358.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2224, "grad_norm": 0.0, "learning_rate": 3.4444444444444444e-07, "loss": 0.0, "num_tokens": 36059962.0, "reward": 0.583984375, "reward_std": 0.08932922780513763, "rewards/accuracy_reward_step": 0.16015625, "rewards/format_reward_step": 0.84765625, "step": 139 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.8151722837022133, "calib/avg_num_step_conf": 1.5703125, "calib/ece": 0.2853149606299213, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0984251968503937, "calib/gap": 0.26493838028169014, "calib/mean_conf": 0.7262598425196851, "calib/mu_c": 0.874375, "calib/mu_w": 0.6094366197183099, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.2853149606299213, "calib/std_conf": 0.28053763660644737, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.8384831460674157, "calib/step_q_c_n": 178.0, "calib/step_q_gap": 0.16058136035312998, "calib/step_q_w": 0.6779017857142857, "calib/step_q_w_n": 224.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 216.0, "completions/max_terminated_length": 216.0, "completions/mean_length": 1.07421875, "completions/mean_terminated_length": 137.5, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.224, "grad_norm": 19.582271575927734, "learning_rate": 3.388888888888889e-07, "loss": 0.064, "num_tokens": 36321421.0, "reward": 0.908203125, "reward_std": 0.1559145301580429, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.94140625, "step": 140 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.837111644354635, "calib/avg_num_step_conf": 1.171875, "calib/ece": 0.3227843137254903, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.84375, "calib/frac_conf_gt_0.9": 0.09803921568627451, "calib/gap": 0.24756125284162656, "calib/mean_conf": 0.7423921568627451, "calib/mu_c": 0.88607476635514, "calib/mu_w": 0.6385135135135135, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.84375, "calib/pce": 0.3227843137254903, "calib/std_conf": 0.26380355721868176, "calib/step_conf_rate": 0.84375, "calib/step_q_c": 0.8762585034013606, "calib/step_q_c_n": 147.0, "calib/step_q_gap": 0.20011471255168733, "calib/step_q_w": 0.6761437908496732, "calib/step_q_w_n": 153.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 329.0, "completions/max_terminated_length": 329.0, "completions/mean_length": 1.28515625, "completions/mean_terminated_length": 329.0, "completions/min_length": 0.0, "completions/min_terminated_length": 329.0, "epoch": 0.2256, "grad_norm": 3.8931891918182373, "learning_rate": 3.333333333333333e-07, "loss": 0.0387, "num_tokens": 36583502.0, "reward": 0.83984375, "reward_std": 0.1878194510936737, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.84375, "step": 141 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.8214285714285714, "calib/avg_num_step_conf": 1.37890625, "calib/ece": 0.33768627450980393, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.09411764705882353, "calib/gap": 0.29801108374384244, "calib/mean_conf": 0.6710196078431373, "calib/mu_c": 0.8673563218390805, "calib/mu_w": 0.5693452380952381, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.828125, "calib/pce": 0.33376470588235296, "calib/std_conf": 0.31803646912573386, "calib/step_conf_rate": 0.828125, "calib/step_q_c": 0.8450993377483443, "calib/step_q_c_n": 151.0, "calib/step_q_gap": 0.17544587240180975, "calib/step_q_w": 0.6696534653465346, "calib/step_q_w_n": 202.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 71.0, "completions/max_terminated_length": 71.0, "completions/mean_length": 0.27734375, "completions/mean_terminated_length": 71.0, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.2272, "grad_norm": 4.603307247161865, "learning_rate": 3.2777777777777776e-07, "loss": 0.0211, "num_tokens": 36844285.0, "reward": 0.75390625, "reward_std": 0.2743779122829437, "rewards/accuracy_reward_step": 0.33984375, "rewards/format_reward_step": 0.828125, "step": 142 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5820154352226721, "calib/avg_num_step_conf": 1.3046875, "calib/ece": 0.3389453124999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.03515625, "calib/gap": 0.14803137651821863, "calib/mean_conf": 0.7444140625, "calib/mu_c": 0.8323076923076923, "calib/mu_w": 0.6842763157894737, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.84765625, "calib/pce": 0.33855468749999984, "calib/std_conf": 0.25949102908818655, "calib/step_conf_rate": 0.84765625, "calib/step_q_c": 0.8037419354838711, "calib/step_q_c_n": 155.0, "calib/step_q_gap": 0.0545799243106867, "calib/step_q_w": 0.7491620111731844, "calib/step_q_w_n": 179.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2288, "grad_norm": 0.0, "learning_rate": 3.222222222222222e-07, "loss": 0.0, "num_tokens": 37104013.0, "reward": 0.822265625, "reward_std": 0.19560091197490692, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 0.83203125, "step": 143 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.762091978326946, "calib/avg_num_step_conf": 1.29296875, "calib/ece": 0.383313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.8359375, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.19431710056825668, "calib/mean_conf": 0.7519411764705882, "calib/mu_c": 0.8746276595744679, "calib/mu_w": 0.6803105590062112, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.83984375, "calib/pce": 0.383313725490196, "calib/std_conf": 0.2457981326820618, "calib/step_conf_rate": 0.83984375, "calib/step_q_c": 0.8461481481481482, "calib/step_q_c_n": 135.0, "calib/step_q_gap": 0.09543386243386254, "calib/step_q_w": 0.7507142857142857, "calib/step_q_w_n": 196.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2304, "grad_norm": 0.0, "learning_rate": 3.166666666666666e-07, "loss": 0.0, "num_tokens": 37363837.0, "reward": 0.7890625, "reward_std": 0.23823988437652588, "rewards/accuracy_reward_step": 0.37109375, "rewards/format_reward_step": 0.8359375, "step": 144 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7471633274978928, "calib/avg_num_step_conf": 0.9765625, "calib/ece": 0.315078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.796875, "calib/frac_conf_gt_0.9": 0.05078125, "calib/gap": 0.24671335019127283, "calib/mean_conf": 0.6939843750000001, "calib/mu_c": 0.8472164948453608, "calib/mu_w": 0.6005031446540879, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.796875, "calib/pce": 0.315078125, "calib/std_conf": 0.2903199739181915, "calib/step_conf_rate": 0.796875, "calib/step_q_c": 0.8206422018348626, "calib/step_q_c_n": 109.0, "calib/step_q_gap": 0.141635109636281, "calib/step_q_w": 0.6790070921985816, "calib/step_q_w_n": 141.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.232, "grad_norm": 0.0, "learning_rate": 3.111111111111111e-07, "loss": 0.0, "num_tokens": 37625069.0, "reward": 0.77734375, "reward_std": 0.22878046333789825, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.796875, "step": 145 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.739271331487541, "calib/avg_num_step_conf": 1.25, "calib/ece": 0.33418972332015806, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.83984375, "calib/frac_conf_gt_0.9": 0.10276679841897234, "calib/gap": 0.16757676818525058, "calib/mean_conf": 0.7610671936758894, "calib/mu_c": 0.8518103448275864, "calib/mu_w": 0.6842335766423359, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.84765625, "calib/pce": 0.31837944664031614, "calib/std_conf": 0.2577004715448092, "calib/step_conf_rate": 0.84765625, "calib/step_q_c": 0.8490196078431373, "calib/step_q_c_n": 153.0, "calib/step_q_gap": 0.10327110484912538, "calib/step_q_w": 0.745748502994012, "calib/step_q_w_n": 167.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 168.0, "completions/max_terminated_length": 168.0, "completions/mean_length": 1.40234375, "completions/mean_terminated_length": 119.66667175292969, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.2336, "grad_norm": 20.80350685119629, "learning_rate": 3.055555555555556e-07, "loss": 0.0497, "num_tokens": 37887572.0, "reward": 0.873046875, "reward_std": 0.19967760145664215, "rewards/accuracy_reward_step": 0.453125, "rewards/format_reward_step": 0.83984375, "step": 146 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7865740740740742, "calib/avg_num_step_conf": 1.3671875, "calib/ece": 0.22133333333333344, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.08627450980392157, "calib/gap": 0.21573148148148147, "calib/mean_conf": 0.7499607843137254, "calib/mu_c": 0.8514814814814814, "calib/mu_w": 0.6357499999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.22094117647058836, "calib/std_conf": 0.23815384389946498, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.8319897959183673, "calib/step_q_c_n": 196.0, "calib/step_q_gap": 0.1154313543599258, "calib/step_q_w": 0.7165584415584415, "calib/step_q_w_n": 154.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2352, "grad_norm": 0.0, "learning_rate": 3e-07, "loss": 0.0, "num_tokens": 38146868.0, "reward": 0.982421875, "reward_std": 0.26381731033325195, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.91015625, "step": 147 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.771933062630737, "calib/avg_num_step_conf": 1.19921875, "calib/ece": 0.24882352941176455, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.10588235294117647, "calib/gap": 0.2499077150239939, "calib/mean_conf": 0.7429411764705882, "calib/mu_c": 0.8693650793650791, "calib/mu_w": 0.6194573643410852, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.83984375, "calib/pce": 0.24882352941176455, "calib/std_conf": 0.2541644837011783, "calib/step_conf_rate": 0.83984375, "calib/step_q_c": 0.8450802139037432, "calib/step_q_c_n": 187.0, "calib/step_q_gap": 0.11258021390374329, "calib/step_q_w": 0.7324999999999999, "calib/step_q_w_n": 120.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 125.0, "completions/max_terminated_length": 125.0, "completions/mean_length": 0.48828125, "completions/mean_terminated_length": 125.0, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.2368, "grad_norm": 9.787782669067383, "learning_rate": 2.9444444444444444e-07, "loss": 0.0386, "num_tokens": 38404913.0, "reward": 0.908203125, "reward_std": 0.23260335624217987, "rewards/accuracy_reward_step": 0.4921875, "rewards/format_reward_step": 0.83203125, "step": 148 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.8605001400952648, "calib/avg_num_step_conf": 0.9140625, "calib/ece": 0.33235294117647063, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.6484375, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": 0.3200721490613617, "calib/mean_conf": 0.657843137254902, "calib/mu_c": 0.873734939759036, "calib/mu_w": 0.5536627906976743, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.66015625, "calib/pce": 0.33235294117647063, "calib/std_conf": 0.3017693816605732, "calib/step_conf_rate": 0.66015625, "calib/step_q_c": 0.8461475409836066, "calib/step_q_c_n": 122.0, "calib/step_q_gap": 0.26802254098360656, "calib/step_q_w": 0.578125, "calib/step_q_w_n": 112.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 128.0, "completions/max_terminated_length": 128.0, "completions/mean_length": 0.5, "completions/mean_terminated_length": 128.0, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.2384, "grad_norm": 4.38532018661499, "learning_rate": 2.8888888888888885e-07, "loss": 0.0113, "num_tokens": 38666121.0, "reward": 0.6484375, "reward_std": 0.16728198528289795, "rewards/accuracy_reward_step": 0.32421875, "rewards/format_reward_step": 0.6484375, "step": 149 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7517543859649123, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.44574218750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.0703125, "calib/gap": 0.22140510366826172, "calib/mean_conf": 0.7035546875, "calib/mu_c": 0.867878787878788, "calib/mu_w": 0.6464736842105263, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.7421875, "calib/pce": 0.44574218750000005, "calib/std_conf": 0.27628298187144523, "calib/step_conf_rate": 0.7421875, "calib/step_q_c": 0.8296341463414633, "calib/step_q_c_n": 82.0, "calib/step_q_gap": 0.09837700348432055, "calib/step_q_w": 0.7312571428571427, "calib/step_q_w_n": 175.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 2.833333333333333e-07, "loss": 0.0, "num_tokens": 38924825.0, "reward": 0.625, "reward_std": 0.2419390082359314, "rewards/accuracy_reward_step": 0.2578125, "rewards/format_reward_step": 0.734375, "step": 150 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7865737203972498, "calib/avg_num_step_conf": 1.0859375, "calib/ece": 0.34148437500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.10546875, "calib/gap": 0.24296154825566607, "calib/mean_conf": 0.7399218750000001, "calib/mu_c": 0.8860784313725492, "calib/mu_w": 0.6431168831168831, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.81640625, "calib/pce": 0.34148437500000006, "calib/std_conf": 0.27425893357643677, "calib/step_conf_rate": 0.81640625, "calib/step_q_c": 0.8683333333333334, "calib/step_q_c_n": 132.0, "calib/step_q_gap": 0.09675799086758008, "calib/step_q_w": 0.7715753424657533, "calib/step_q_w_n": 146.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2416, "grad_norm": 0.0, "learning_rate": 2.7777777777777776e-07, "loss": 0.0, "num_tokens": 39185953.0, "reward": 0.806640625, "reward_std": 0.14504341781139374, "rewards/accuracy_reward_step": 0.3984375, "rewards/format_reward_step": 0.81640625, "step": 151 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.8505436971405558, "calib/avg_num_step_conf": 1.09765625, "calib/ece": 0.43984375000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.07421875, "calib/gap": 0.2655110753121225, "calib/mean_conf": 0.69375, "calib/mu_c": 0.8918461538461538, "calib/mu_w": 0.6263350785340314, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.84375, "calib/pce": 0.43984375000000003, "calib/std_conf": 0.2864614393771001, "calib/step_conf_rate": 0.84375, "calib/step_q_c": 0.8694, "calib/step_q_c_n": 100.0, "calib/step_q_gap": 0.1652563535911602, "calib/step_q_w": 0.7041436464088398, "calib/step_q_w_n": 181.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2432, "grad_norm": 0.0, "learning_rate": 2.7222222222222216e-07, "loss": 0.0, "num_tokens": 39444273.0, "reward": 0.669921875, "reward_std": 0.1663089096546173, "rewards/accuracy_reward_step": 0.25390625, "rewards/format_reward_step": 0.83203125, "step": 152 }, { "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.7240257516608453, "calib/avg_num_step_conf": 1.171875, "calib/ece": 0.37652000000000013, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.8046875, "calib/frac_conf_gt_0.9": 0.064, "calib/gap": 0.1877891925210604, "calib/mean_conf": 0.74852, "calib/mu_c": 0.8664516129032259, "calib/mu_w": 0.6786624203821655, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.83203125, "calib/pce": 0.37652000000000013, "calib/std_conf": 0.2518106622047605, "calib/step_conf_rate": 0.83203125, "calib/step_q_c": 0.842280701754386, "calib/step_q_c_n": 114.0, "calib/step_q_gap": 0.08249575551782684, "calib/step_q_w": 0.7597849462365591, "calib/step_q_w_n": 186.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.984375, "completions/max_length": 293.0, "completions/max_terminated_length": 293.0, "completions/mean_length": 2.734375, "completions/mean_terminated_length": 175.0, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.2448, "grad_norm": 6.291600704193115, "learning_rate": 2.6666666666666667e-07, "loss": 0.0478, "num_tokens": 39706157.0, "reward": 0.765625, "reward_std": 0.2518659234046936, "rewards/accuracy_reward_step": 0.36328125, "rewards/format_reward_step": 0.8046875, "step": 153 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7246344564526382, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.30586956521739134, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.07509881422924901, "calib/gap": 0.19740209790209773, "calib/mean_conf": 0.7406521739130434, "calib/mu_c": 0.8522272727272726, "calib/mu_w": 0.6548251748251749, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.84765625, "calib/pce": 0.30586956521739134, "calib/std_conf": 0.264670108566857, "calib/step_conf_rate": 0.84765625, "calib/step_q_c": 0.83856, "calib/step_q_c_n": 125.0, "calib/step_q_gap": 0.09600360902255645, "calib/step_q_w": 0.7425563909774435, "calib/step_q_w_n": 133.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 162.0, "completions/max_terminated_length": 162.0, "completions/mean_length": 1.515625, "completions/mean_terminated_length": 129.33334350585938, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.2464, "grad_norm": 27.489587783813477, "learning_rate": 2.6111111111111113e-07, "loss": 0.1159, "num_tokens": 39968409.0, "reward": 0.84765625, "reward_std": 0.24236908555030823, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.828125, "step": 154 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7320774463631605, "calib/avg_num_step_conf": 1.13671875, "calib/ece": 0.34586614173228336, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.73828125, "calib/frac_conf_gt_0.9": 0.09055118110236221, "calib/gap": 0.21522370486656195, "calib/mean_conf": 0.7316929133858268, "calib/mu_c": 0.8638775510204081, "calib/mu_w": 0.6486538461538461, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.7578125, "calib/pce": 0.34586614173228336, "calib/std_conf": 0.25883120124355463, "calib/step_conf_rate": 0.7578125, "calib/step_q_c": 0.8274657534246576, "calib/step_q_c_n": 146.0, "calib/step_q_gap": 0.08346575342465756, "calib/step_q_w": 0.744, "calib/step_q_w_n": 145.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 179.0, "completions/max_terminated_length": 179.0, "completions/mean_length": 0.97265625, "completions/mean_terminated_length": 124.5, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.248, "grad_norm": 12.564913749694824, "learning_rate": 2.5555555555555553e-07, "loss": 0.0699, "num_tokens": 40230018.0, "reward": 0.755859375, "reward_std": 0.2155877947807312, "rewards/accuracy_reward_step": 0.38671875, "rewards/format_reward_step": 0.73828125, "step": 155 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7833593306863301, "calib/avg_num_step_conf": 1.1640625, "calib/ece": 0.37767716535433055, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.80078125, "calib/frac_conf_gt_0.9": 0.08661417322834646, "calib/gap": 0.24506806579693707, "calib/mean_conf": 0.7005118110236219, "calib/mu_c": 0.8664634146341463, "calib/mu_w": 0.6213953488372093, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.81640625, "calib/pce": 0.37767716535433055, "calib/std_conf": 0.2834811264848123, "calib/step_conf_rate": 0.81640625, "calib/step_q_c": 0.8305970149253732, "calib/step_q_c_n": 134.0, "calib/step_q_gap": 0.16748725882781224, "calib/step_q_w": 0.663109756097561, "calib/step_q_w_n": 164.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 176.0, "completions/max_terminated_length": 176.0, "completions/mean_length": 0.6875, "completions/mean_terminated_length": 176.0, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.2496, "grad_norm": 6.4348907470703125, "learning_rate": 2.5e-07, "loss": 0.0181, "num_tokens": 40492338.0, "reward": 0.720703125, "reward_std": 0.21248027682304382, "rewards/accuracy_reward_step": 0.3203125, "rewards/format_reward_step": 0.80078125, "step": 156 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.8132274776290116, "calib/avg_num_step_conf": 1.14453125, "calib/ece": 0.2923437499999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.76953125, "calib/frac_conf_gt_0.9": 0.04296875, "calib/gap": 0.3179546524927672, "calib/mean_conf": 0.6399999999999999, "calib/mu_c": 0.8474157303370786, "calib/mu_w": 0.5294610778443114, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.7734375, "calib/pce": 0.2923437499999999, "calib/std_conf": 0.3270858545550388, "calib/step_conf_rate": 0.7734375, "calib/step_q_c": 0.8076, "calib/step_q_c_n": 125.0, "calib/step_q_gap": 0.12188571428571437, "calib/step_q_w": 0.6857142857142856, "calib/step_q_w_n": 168.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2512, "grad_norm": 0.0, "learning_rate": 2.4444444444444445e-07, "loss": 0.0, "num_tokens": 40752426.0, "reward": 0.732421875, "reward_std": 0.2622066140174866, "rewards/accuracy_reward_step": 0.34765625, "rewards/format_reward_step": 0.76953125, "step": 157 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7431954371525522, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.3642063492063493, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.68359375, "calib/frac_conf_gt_0.9": 0.0992063492063492, "calib/gap": 0.2789733593242366, "calib/mean_conf": 0.6856349206349205, "calib/mu_c": 0.8749382716049383, "calib/mu_w": 0.5959649122807017, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.69140625, "calib/pce": 0.3642063492063493, "calib/std_conf": 0.33009257006527015, "calib/step_conf_rate": 0.69140625, "calib/step_q_c": 0.8597872340425533, "calib/step_q_c_n": 94.0, "calib/step_q_gap": 0.07219899874843572, "calib/step_q_w": 0.7875882352941176, "calib/step_q_w_n": 170.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 194.0, "completions/max_terminated_length": 194.0, "completions/mean_length": 1.80078125, "completions/mean_terminated_length": 153.6666717529297, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.2528, "grad_norm": 14.57109260559082, "learning_rate": 2.388888888888889e-07, "loss": 0.0776, "num_tokens": 41014319.0, "reward": 0.658203125, "reward_std": 0.2848474979400635, "rewards/accuracy_reward_step": 0.31640625, "rewards/format_reward_step": 0.68359375, "step": 158 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.8447322970639033, "calib/avg_num_step_conf": 0.828125, "calib/ece": 0.4440711462450593, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.65625, "calib/frac_conf_gt_0.9": 0.07509881422924901, "calib/gap": 0.28110708117443894, "calib/mean_conf": 0.6812252964426877, "calib/mu_c": 0.8956666666666668, "calib/mu_w": 0.6145595854922279, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.6640625, "calib/pce": 0.4440711462450593, "calib/std_conf": 0.31747347564723677, "calib/step_conf_rate": 0.6640625, "calib/step_q_c": 0.8800000000000001, "calib/step_q_c_n": 80.0, "calib/step_q_gap": 0.1796212121212123, "calib/step_q_w": 0.7003787878787878, "calib/step_q_w_n": 132.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 192.0, "completions/max_terminated_length": 192.0, "completions/mean_length": 0.75, "completions/mean_terminated_length": 192.0, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.2544, "grad_norm": 5.986937522888184, "learning_rate": 2.3333333333333333e-07, "loss": 0.0253, "num_tokens": 41276223.0, "reward": 0.5625, "reward_std": 0.16526161134243011, "rewards/accuracy_reward_step": 0.234375, "rewards/format_reward_step": 0.65625, "step": 159 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7982125124131082, "calib/avg_num_step_conf": 1.24609375, "calib/ece": 0.3841338582677163, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.78125, "calib/frac_conf_gt_0.9": 0.07086614173228346, "calib/gap": 0.20020324395895428, "calib/mean_conf": 0.7581496062992126, "calib/mu_c": 0.8834736842105265, "calib/mu_w": 0.6832704402515722, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.79296875, "calib/pce": 0.3841338582677163, "calib/std_conf": 0.2493979389422934, "calib/step_conf_rate": 0.79296875, "calib/step_q_c": 0.8559210526315789, "calib/step_q_c_n": 152.0, "calib/step_q_gap": 0.10981326820044124, "calib/step_q_w": 0.7461077844311377, "calib/step_q_w_n": 167.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.256, "grad_norm": 0.0, "learning_rate": 2.2777777777777776e-07, "loss": 0.0, "num_tokens": 41533975.0, "reward": 0.76171875, "reward_std": 0.22846969962120056, "rewards/accuracy_reward_step": 0.37109375, "rewards/format_reward_step": 0.78125, "step": 160 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.8618847627468317, "calib/avg_num_step_conf": 0.8359375, "calib/ece": 0.30623015873015874, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.6015625, "calib/frac_conf_gt_0.9": 0.11904761904761904, "calib/gap": 0.4072104332449164, "calib/mean_conf": 0.6157539682539682, "calib/mu_c": 0.8969230769230774, "calib/mu_w": 0.489712643678161, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.609375, "calib/pce": 0.30623015873015874, "calib/std_conf": 0.366754084525311, "calib/step_conf_rate": 0.609375, "calib/step_q_c": 0.8607142857142858, "calib/step_q_c_n": 112.0, "calib/step_q_gap": 0.15679271708683484, "calib/step_q_w": 0.7039215686274509, "calib/step_q_w_n": 102.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 162.0, "completions/max_terminated_length": 162.0, "completions/mean_length": 0.98046875, "completions/mean_terminated_length": 125.5, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.2576, "grad_norm": 6.436363697052002, "learning_rate": 2.222222222222222e-07, "loss": 0.0219, "num_tokens": 41791610.0, "reward": 0.609375, "reward_std": 0.22688651084899902, "rewards/accuracy_reward_step": 0.30859375, "rewards/format_reward_step": 0.6015625, "step": 161 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.8629441624365483, "calib/avg_num_step_conf": 0.9453125, "calib/ece": 0.4206274509803921, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.60546875, "calib/frac_conf_gt_0.9": 0.058823529411764705, "calib/gap": 0.3042201995448979, "calib/mean_conf": 0.6480784313725491, "calib/mu_c": 0.8831034482758623, "calib/mu_w": 0.5788832487309644, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.625, "calib/pce": 0.4206274509803921, "calib/std_conf": 0.30981182417322195, "calib/step_conf_rate": 0.625, "calib/step_q_c": 0.8247619047619048, "calib/step_q_c_n": 105.0, "calib/step_q_gap": 0.12987139381299984, "calib/step_q_w": 0.694890510948905, "calib/step_q_w_n": 137.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2592, "grad_norm": 0.0, "learning_rate": 2.1666666666666667e-07, "loss": 0.0, "num_tokens": 42046970.0, "reward": 0.529296875, "reward_std": 0.21713021397590637, "rewards/accuracy_reward_step": 0.2265625, "rewards/format_reward_step": 0.60546875, "step": 162 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.8212469895638213, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.29606425702811257, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.6953125, "calib/frac_conf_gt_0.9": 0.14056224899598393, "calib/gap": 0.31365935242172904, "calib/mean_conf": 0.7016867469879517, "calib/mu_c": 0.8881188118811885, "calib/mu_w": 0.5744594594594594, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.70703125, "calib/pce": 0.29606425702811257, "calib/std_conf": 0.3169298730678942, "calib/step_conf_rate": 0.70703125, "calib/step_q_c": 0.8518493150684932, "calib/step_q_c_n": 146.0, "calib/step_q_gap": 0.11065665451803453, "calib/step_q_w": 0.7411926605504586, "calib/step_q_w_n": 109.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 147.0, "completions/max_terminated_length": 147.0, "completions/mean_length": 0.8984375, "completions/mean_terminated_length": 115.0, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.2608, "grad_norm": 22.149232864379883, "learning_rate": 2.111111111111111e-07, "loss": 0.0386, "num_tokens": 42305384.0, "reward": 0.7421875, "reward_std": 0.25784483551979065, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.6953125, "step": 163 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7302590414215994, "calib/avg_num_step_conf": 1.3203125, "calib/ece": 0.28664031620553365, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.13438735177865613, "calib/gap": 0.21386497309473163, "calib/mean_conf": 0.7688537549407116, "calib/mu_c": 0.8795901639344264, "calib/mu_w": 0.6657251908396947, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.83984375, "calib/pce": 0.28664031620553365, "calib/std_conf": 0.2422503816035925, "calib/step_conf_rate": 0.83984375, "calib/step_q_c": 0.8427272727272728, "calib/step_q_c_n": 198.0, "calib/step_q_gap": 0.09772727272727266, "calib/step_q_w": 0.7450000000000001, "calib/step_q_w_n": 140.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 157.0, "completions/max_terminated_length": 157.0, "completions/mean_length": 0.90625, "completions/mean_terminated_length": 116.0, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.2624, "grad_norm": 24.581096649169922, "learning_rate": 2.0555555555555553e-07, "loss": 0.0773, "num_tokens": 42566392.0, "reward": 0.89453125, "reward_std": 0.17005395889282227, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.828125, "step": 164 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.8419240669240668, "calib/avg_num_step_conf": 1.16796875, "calib/ece": 0.300711462450593, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.17391304347826086, "calib/gap": 0.31613963963963976, "calib/mean_conf": 0.7157312252964426, "calib/mu_c": 0.9006666666666667, "calib/mu_w": 0.584527027027027, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.76953125, "calib/pce": 0.300711462450593, "calib/std_conf": 0.3075684937587791, "calib/step_conf_rate": 0.76953125, "calib/step_q_c": 0.879591836734694, "calib/step_q_c_n": 147.0, "calib/step_q_gap": 0.15617078410311502, "calib/step_q_w": 0.723421052631579, "calib/step_q_w_n": 152.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 42.0, "completions/max_terminated_length": 42.0, "completions/mean_length": 0.1640625, "completions/mean_terminated_length": 42.0, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.264, "grad_norm": 4.802876949310303, "learning_rate": 2e-07, "loss": 0.0113, "num_tokens": 42827162.0, "reward": 0.77734375, "reward_std": 0.23624101281166077, "rewards/accuracy_reward_step": 0.41015625, "rewards/format_reward_step": 0.734375, "step": 165 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.7948529411764707, "calib/avg_num_step_conf": 0.9375, "calib/ece": 0.3579032258064516, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.61328125, "calib/frac_conf_gt_0.9": 0.024193548387096774, "calib/gap": 0.2886732026143791, "calib/mean_conf": 0.6320967741935484, "calib/mu_c": 0.8416176470588236, "calib/mu_w": 0.5529444444444445, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.6484375, "calib/pce": 0.3579032258064516, "calib/std_conf": 0.3087899750627789, "calib/step_conf_rate": 0.6484375, "calib/step_q_c": 0.8106306306306305, "calib/step_q_c_n": 111.0, "calib/step_q_gap": 0.129622878692646, "calib/step_q_w": 0.6810077519379845, "calib/step_q_w_n": 129.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.984375, "completions/max_length": 229.0, "completions/max_terminated_length": 229.0, "completions/mean_length": 1.65625, "completions/mean_terminated_length": 106.0, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.2656, "grad_norm": 9.170284271240234, "learning_rate": 1.9444444444444445e-07, "loss": 0.0368, "num_tokens": 43085458.0, "reward": 0.572265625, "reward_std": 0.2555355727672577, "rewards/accuracy_reward_step": 0.265625, "rewards/format_reward_step": 0.61328125, "step": 166 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.782930402930403, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.38193675889328055, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.63671875, "calib/frac_conf_gt_0.9": 0.09486166007905138, "calib/gap": 0.2654461538461539, "calib/mean_conf": 0.6902371541501976, "calib/mu_c": 0.8738461538461538, "calib/mu_w": 0.6083999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.64453125, "calib/pce": 0.38193675889328055, "calib/std_conf": 0.2974222970950437, "calib/step_conf_rate": 0.64453125, "calib/step_q_c": 0.8478313253012049, "calib/step_q_c_n": 83.0, "calib/step_q_gap": 0.11248373171831716, "calib/step_q_w": 0.7353475935828877, "calib/step_q_w_n": 187.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 190.0, "completions/max_terminated_length": 190.0, "completions/mean_length": 0.7421875, "completions/mean_terminated_length": 190.0, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.2672, "grad_norm": 11.115410804748535, "learning_rate": 1.8888888888888888e-07, "loss": 0.0253, "num_tokens": 43347136.0, "reward": 0.623046875, "reward_std": 0.19046950340270996, "rewards/accuracy_reward_step": 0.3046875, "rewards/format_reward_step": 0.63671875, "step": 167 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.825672454885938, "calib/avg_num_step_conf": 1.15625, "calib/ece": 0.37531496062992137, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.7734375, "calib/frac_conf_gt_0.9": 0.09055118110236221, "calib/gap": 0.2425311542390196, "calib/mean_conf": 0.7257086614173228, "calib/mu_c": 0.8832584269662923, "calib/mu_w": 0.6407272727272727, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.78515625, "calib/pce": 0.37531496062992137, "calib/std_conf": 0.26522032469937384, "calib/step_conf_rate": 0.78515625, "calib/step_q_c": 0.8656164383561644, "calib/step_q_c_n": 146.0, "calib/step_q_gap": 0.14681643835616442, "calib/step_q_w": 0.7188, "calib/step_q_w_n": 150.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2688, "grad_norm": 0.0, "learning_rate": 1.833333333333333e-07, "loss": 0.0, "num_tokens": 43608984.0, "reward": 0.734375, "reward_std": 0.1352011114358902, "rewards/accuracy_reward_step": 0.34765625, "rewards/format_reward_step": 0.7734375, "step": 168 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.8567620650953984, "calib/avg_num_step_conf": 0.83203125, "calib/ece": 0.40277777777777773, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.578125, "calib/frac_conf_gt_0.9": 0.05555555555555555, "calib/gap": 0.3256902356902356, "calib/mean_conf": 0.6170634920634921, "calib/mu_c": 0.8729629629629628, "calib/mu_w": 0.5472727272727272, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.59765625, "calib/pce": 0.40277777777777773, "calib/std_conf": 0.32718755230212576, "calib/step_conf_rate": 0.59765625, "calib/step_q_c": 0.86, "calib/step_q_c_n": 71.0, "calib/step_q_gap": 0.17563380281690133, "calib/step_q_w": 0.6843661971830987, "calib/step_q_w_n": 142.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 160.0, "completions/max_terminated_length": 160.0, "completions/mean_length": 1.1796875, "completions/mean_terminated_length": 151.0, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.2704, "grad_norm": 4.74724817276001, "learning_rate": 1.7777777777777776e-07, "loss": 0.023, "num_tokens": 43870190.0, "reward": 0.5, "reward_std": 0.19524022936820984, "rewards/accuracy_reward_step": 0.2109375, "rewards/format_reward_step": 0.578125, "step": 169 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.8526422764227642, "calib/avg_num_step_conf": 1.0703125, "calib/ece": 0.2961417322834644, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.76171875, "calib/frac_conf_gt_0.9": 0.06299212598425197, "calib/gap": 0.3425813008130082, "calib/mean_conf": 0.6504724409448819, "calib/mu_c": 0.8716666666666667, "calib/mu_w": 0.5290853658536585, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.78515625, "calib/pce": 0.2961417322834644, "calib/std_conf": 0.3323842721288249, "calib/step_conf_rate": 0.78515625, "calib/step_q_c": 0.8497478991596636, "calib/step_q_c_n": 119.0, "calib/step_q_gap": 0.22755435077256692, "calib/step_q_w": 0.6221935483870967, "calib/step_q_w_n": 155.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 287.0, "completions/max_terminated_length": 287.0, "completions/mean_length": 1.98828125, "completions/mean_terminated_length": 254.5, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.272, "grad_norm": 10.044112205505371, "learning_rate": 1.7222222222222222e-07, "loss": 0.0533, "num_tokens": 44122075.0, "reward": 0.732421875, "reward_std": 0.17411759495735168, "rewards/accuracy_reward_step": 0.3515625, "rewards/format_reward_step": 0.76171875, "step": 170 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.8251155115511551, "calib/avg_num_step_conf": 1.15234375, "calib/ece": 0.2896812749003984, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.796875, "calib/frac_conf_gt_0.9": 0.0796812749003984, "calib/gap": 0.288952475247525, "calib/mean_conf": 0.6920717131474102, "calib/mu_c": 0.864752475247525, "calib/mu_w": 0.5758, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.81640625, "calib/pce": 0.2896812749003984, "calib/std_conf": 0.3005421575798019, "calib/step_conf_rate": 0.81640625, "calib/step_q_c": 0.8299319727891157, "calib/step_q_c_n": 147.0, "calib/step_q_gap": 0.1838508917080347, "calib/step_q_w": 0.646081081081081, "calib/step_q_w_n": 148.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 201.0, "completions/max_terminated_length": 201.0, "completions/mean_length": 1.50390625, "completions/mean_terminated_length": 192.5, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.2736, "grad_norm": 13.769118309020996, "learning_rate": 1.6666666666666665e-07, "loss": 0.0434, "num_tokens": 44380220.0, "reward": 0.79296875, "reward_std": 0.22870349884033203, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.796875, "step": 171 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.851388079984469, "calib/avg_num_step_conf": 0.89453125, "calib/ece": 0.4028458498023715, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.67578125, "calib/frac_conf_gt_0.9": 0.05533596837944664, "calib/gap": 0.34195593088720644, "calib/mean_conf": 0.6044268774703557, "calib/mu_c": 0.877450980392157, "calib/mu_w": 0.5354950495049505, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.72265625, "calib/pce": 0.4028458498023715, "calib/std_conf": 0.3585347977182058, "calib/step_conf_rate": 0.72265625, "calib/step_q_c": 0.8674603174603177, "calib/step_q_c_n": 63.0, "calib/step_q_gap": 0.2221591126410407, "calib/step_q_w": 0.645301204819277, "calib/step_q_w_n": 166.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 200.0, "completions/max_terminated_length": 200.0, "completions/mean_length": 0.78125, "completions/mean_terminated_length": 200.0, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.2752, "grad_norm": 3.7966060638427734, "learning_rate": 1.611111111111111e-07, "loss": 0.0113, "num_tokens": 44639148.0, "reward": 0.537109375, "reward_std": 0.15649619698524475, "rewards/accuracy_reward_step": 0.19921875, "rewards/format_reward_step": 0.67578125, "step": 172 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.7164351851851851, "calib/avg_num_step_conf": 0.97265625, "calib/ece": 0.43691699604743073, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.76171875, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.21969163359788357, "calib/mean_conf": 0.6819762845849802, "calib/mu_c": 0.84609375, "calib/mu_w": 0.6264021164021164, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.80078125, "calib/pce": 0.4329644268774703, "calib/std_conf": 0.3034076026579139, "calib/step_conf_rate": 0.80078125, "calib/step_q_c": 0.835625, "calib/step_q_c_n": 80.0, "calib/step_q_gap": 0.13396819526627224, "calib/step_q_w": 0.7016568047337277, "calib/step_q_w_n": 169.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2768, "grad_norm": 0.0, "learning_rate": 1.5555555555555556e-07, "loss": 0.0, "num_tokens": 44899180.0, "reward": 0.630859375, "reward_std": 0.15970739722251892, "rewards/accuracy_reward_step": 0.25, "rewards/format_reward_step": 0.76171875, "step": 173 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.7298828719196836, "calib/avg_num_step_conf": 1.06640625, "calib/ece": 0.42377510040160626, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.74609375, "calib/frac_conf_gt_0.9": 0.07228915662650602, "calib/gap": 0.19216154548220254, "calib/mean_conf": 0.7209638554216867, "calib/mu_c": 0.8544736842105263, "calib/mu_w": 0.6623121387283237, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.7578125, "calib/pce": 0.4197590361445782, "calib/std_conf": 0.26651528339652664, "calib/step_conf_rate": 0.7578125, "calib/step_q_c": 0.8565384615384617, "calib/step_q_c_n": 104.0, "calib/step_q_gap": 0.12588757396449723, "calib/step_q_w": 0.7306508875739645, "calib/step_q_w_n": 169.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 245.0, "completions/max_terminated_length": 245.0, "completions/mean_length": 2.234375, "completions/mean_terminated_length": 190.6666717529297, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.2784, "grad_norm": 12.086045265197754, "learning_rate": 1.5e-07, "loss": 0.0447, "num_tokens": 45161896.0, "reward": 0.669921875, "reward_std": 0.21960091590881348, "rewards/accuracy_reward_step": 0.296875, "rewards/format_reward_step": 0.74609375, "step": 174 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.7342500689274882, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.31851405622489953, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.75, "calib/frac_conf_gt_0.9": 0.08835341365461848, "calib/gap": 0.26453680727874296, "calib/mean_conf": 0.692008032128514, "calib/mu_c": 0.8577419354838711, "calib/mu_w": 0.5932051282051282, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.78515625, "calib/pce": 0.31851405622489953, "calib/std_conf": 0.3114727033231076, "calib/step_conf_rate": 0.78515625, "calib/step_q_c": 0.8468253968253967, "calib/step_q_c_n": 126.0, "calib/step_q_gap": 0.11101657329598502, "calib/step_q_w": 0.7358088235294117, "calib/step_q_w_n": 136.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 191.0, "completions/max_terminated_length": 191.0, "completions/mean_length": 2.09375, "completions/mean_terminated_length": 178.6666717529297, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.28, "grad_norm": 5.533905506134033, "learning_rate": 1.4444444444444442e-07, "loss": 0.0483, "num_tokens": 45420776.0, "reward": 0.73828125, "reward_std": 0.1640671193599701, "rewards/accuracy_reward_step": 0.36328125, "rewards/format_reward_step": 0.75, "step": 175 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.7624078275666477, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.40164, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.83984375, "calib/frac_conf_gt_0.9": 0.092, "calib/gap": 0.19808139534883729, "calib/mean_conf": 0.74564, "calib/mu_c": 0.8755813953488372, "calib/mu_w": 0.6774999999999999, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.87109375, "calib/pce": 0.40164, "calib/std_conf": 0.2577250286642723, "calib/step_conf_rate": 0.87109375, "calib/step_q_c": 0.8550925925925926, "calib/step_q_c_n": 108.0, "calib/step_q_gap": 0.11809872756191786, "calib/step_q_w": 0.7369938650306748, "calib/step_q_w_n": 163.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 156.0, "completions/max_terminated_length": 156.0, "completions/mean_length": 1.48046875, "completions/mean_terminated_length": 126.33333587646484, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.2816, "grad_norm": 19.42263412475586, "learning_rate": 1.3888888888888888e-07, "loss": 0.0619, "num_tokens": 45677851.0, "reward": 0.759765625, "reward_std": 0.22453749179840088, "rewards/accuracy_reward_step": 0.33984375, "rewards/format_reward_step": 0.83984375, "step": 176 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.6719135802469136, "calib/avg_num_step_conf": 1.08984375, "calib/ece": 0.4373092369477912, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.05220883534136546, "calib/gap": 0.2141666666666664, "calib/mean_conf": 0.6782730923694779, "calib/mu_c": 0.8408333333333331, "calib/mu_w": 0.6266666666666667, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.77734375, "calib/pce": 0.4373092369477912, "calib/std_conf": 0.32396363912757653, "calib/step_conf_rate": 0.77734375, "calib/step_q_c": 0.8234177215189872, "calib/step_q_c_n": 79.0, "calib/step_q_gap": 0.09066772151898728, "calib/step_q_w": 0.7327499999999999, "calib/step_q_w_n": 200.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 215.0, "completions/max_terminated_length": 215.0, "completions/mean_length": 1.41015625, "completions/mean_terminated_length": 180.5, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.2832, "grad_norm": 10.389142036437988, "learning_rate": 1.3333333333333334e-07, "loss": 0.0391, "num_tokens": 45940356.0, "reward": 0.6015625, "reward_std": 0.14131318032741547, "rewards/accuracy_reward_step": 0.234375, "rewards/format_reward_step": 0.734375, "step": 177 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6888651121605667, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.3931620553359682, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.76171875, "calib/frac_conf_gt_0.9": 0.09881422924901186, "calib/gap": 0.24795454545454532, "calib/mean_conf": 0.6975098814229248, "calib/mu_c": 0.8699999999999998, "calib/mu_w": 0.6220454545454545, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.77734375, "calib/pce": 0.3931620553359682, "calib/std_conf": 0.3263326658421405, "calib/step_conf_rate": 0.77734375, "calib/step_q_c": 0.8630769230769229, "calib/step_q_c_n": 91.0, "calib/step_q_gap": 0.11285345938977209, "calib/step_q_w": 0.7502234636871508, "calib/step_q_w_n": 179.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 264.0, "completions/max_terminated_length": 264.0, "completions/mean_length": 1.8515625, "completions/mean_terminated_length": 237.0, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.2848, "grad_norm": 7.318977355957031, "learning_rate": 1.2777777777777777e-07, "loss": 0.0422, "num_tokens": 46202382.0, "reward": 0.681640625, "reward_std": 0.1777898669242859, "rewards/accuracy_reward_step": 0.30078125, "rewards/format_reward_step": 0.76171875, "step": 178 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.802274185499394, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.4046428571428572, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.7890625, "calib/frac_conf_gt_0.9": 0.12698412698412698, "calib/gap": 0.23152562914379415, "calib/mean_conf": 0.7340079365079365, "calib/mu_c": 0.889277108433735, "calib/mu_w": 0.6577514792899408, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.81640625, "calib/pce": 0.4046428571428572, "calib/std_conf": 0.2902711648499745, "calib/step_conf_rate": 0.81640625, "calib/step_q_c": 0.8671296296296295, "calib/step_q_c_n": 108.0, "calib/step_q_gap": 0.15597578347578334, "calib/step_q_w": 0.7111538461538461, "calib/step_q_w_n": 156.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 162.0, "completions/max_terminated_length": 162.0, "completions/mean_length": 0.6328125, "completions/mean_terminated_length": 162.0, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.2864, "grad_norm": 3.512399673461914, "learning_rate": 1.2222222222222222e-07, "loss": 0.0113, "num_tokens": 46457040.0, "reward": 0.71875, "reward_std": 0.2521737813949585, "rewards/accuracy_reward_step": 0.32421875, "rewards/format_reward_step": 0.7890625, "step": 179 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7690644932671864, "calib/avg_num_step_conf": 1.03515625, "calib/ece": 0.3682470119521912, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.765625, "calib/frac_conf_gt_0.9": 0.08366533864541832, "calib/gap": 0.25178525868178603, "calib/mean_conf": 0.7068924302788845, "calib/mu_c": 0.8734117647058824, "calib/mu_w": 0.6216265060240964, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.77734375, "calib/pce": 0.3682470119521912, "calib/std_conf": 0.28738951520310446, "calib/step_conf_rate": 0.77734375, "calib/step_q_c": 0.8467164179104478, "calib/step_q_c_n": 134.0, "calib/step_q_gap": 0.1822126011165548, "calib/step_q_w": 0.664503816793893, "calib/step_q_w_n": 131.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.288, "grad_norm": 0.0, "learning_rate": 1.1666666666666667e-07, "loss": 0.0, "num_tokens": 46714200.0, "reward": 0.71484375, "reward_std": 0.1728074997663498, "rewards/accuracy_reward_step": 0.33203125, "rewards/format_reward_step": 0.765625, "step": 180 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.773431031627753, "calib/avg_num_step_conf": 0.91796875, "calib/ece": 0.4519277108433736, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.75, "calib/frac_conf_gt_0.9": 0.0963855421686747, "calib/gap": 0.23396671634376554, "calib/mean_conf": 0.716987951807229, "calib/mu_c": 0.8889393939393939, "calib/mu_w": 0.6549726775956284, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.79296875, "calib/pce": 0.4519277108433736, "calib/std_conf": 0.3047089661820087, "calib/step_conf_rate": 0.79296875, "calib/step_q_c": 0.8807246376811594, "calib/step_q_c_n": 69.0, "calib/step_q_gap": 0.1521101798498341, "calib/step_q_w": 0.7286144578313253, "calib/step_q_w_n": 166.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2896, "grad_norm": 0.0, "learning_rate": 1.111111111111111e-07, "loss": 0.0, "num_tokens": 46969704.0, "reward": 0.6328125, "reward_std": 0.25595584511756897, "rewards/accuracy_reward_step": 0.2578125, "rewards/format_reward_step": 0.75, "step": 181 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7419469277706825, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.3729803921568627, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.76953125, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.2541890166028098, "calib/mean_conf": 0.6906274509803921, "calib/mu_c": 0.8640740740740742, "calib/mu_w": 0.6098850574712644, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.7890625, "calib/pce": 0.3729803921568627, "calib/std_conf": 0.31367586372199213, "calib/step_conf_rate": 0.7890625, "calib/step_q_c": 0.8459223300970876, "calib/step_q_c_n": 103.0, "calib/step_q_gap": 0.07181518723994484, "calib/step_q_w": 0.7741071428571428, "calib/step_q_w_n": 168.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2912, "grad_norm": 0.0, "learning_rate": 1.0555555555555555e-07, "loss": 0.0, "num_tokens": 47230352.0, "reward": 0.701171875, "reward_std": 0.19887131452560425, "rewards/accuracy_reward_step": 0.31640625, "rewards/format_reward_step": 0.76953125, "step": 182 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.7689903846153846, "calib/avg_num_step_conf": 0.96875, "calib/ece": 0.35266932270916324, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.76953125, "calib/frac_conf_gt_0.9": 0.06772908366533864, "calib/gap": 0.23867582417582445, "calib/mean_conf": 0.71203187250996, "calib/mu_c": 0.8641758241758244, "calib/mu_w": 0.6255, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.82421875, "calib/pce": 0.3510756972111553, "calib/std_conf": 0.2954668380217944, "calib/step_conf_rate": 0.82421875, "calib/step_q_c": 0.8232692307692306, "calib/step_q_c_n": 104.0, "calib/step_q_gap": 0.14653311965811966, "calib/step_q_w": 0.676736111111111, "calib/step_q_w_n": 144.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 171.0, "completions/max_terminated_length": 171.0, "completions/mean_length": 1.78125, "completions/mean_terminated_length": 152.0, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.2928, "grad_norm": 20.446836471557617, "learning_rate": 1e-07, "loss": 0.0808, "num_tokens": 47491240.0, "reward": 0.740234375, "reward_std": 0.2335708737373352, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.76953125, "step": 183 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7458476622521402, "calib/avg_num_step_conf": 0.91015625, "calib/ece": 0.35170634920634913, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.73828125, "calib/frac_conf_gt_0.9": 0.05952380952380952, "calib/gap": 0.26734689397819544, "calib/mean_conf": 0.6651984126984128, "calib/mu_c": 0.8487341772151896, "calib/mu_w": 0.5813872832369942, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.75390625, "calib/pce": 0.35170634920634913, "calib/std_conf": 0.32788578440751, "calib/step_conf_rate": 0.75390625, "calib/step_q_c": 0.8494505494505494, "calib/step_q_c_n": 91.0, "calib/step_q_gap": 0.1465632255068875, "calib/step_q_w": 0.7028873239436619, "calib/step_q_w_n": 142.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2944, "grad_norm": 0.0, "learning_rate": 9.444444444444444e-08, "loss": 0.0, "num_tokens": 47750568.0, "reward": 0.681640625, "reward_std": 0.18778353929519653, "rewards/accuracy_reward_step": 0.3125, "rewards/format_reward_step": 0.73828125, "step": 184 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7827762515262515, "calib/avg_num_step_conf": 0.84765625, "calib/ece": 0.31149606299212595, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.65625, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.35155067155067155, "calib/mean_conf": 0.5792125984251969, "calib/mu_c": 0.8311111111111111, "calib/mu_w": 0.4795604395604396, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.671875, "calib/pce": 0.3036220472440945, "calib/std_conf": 0.3776181179325033, "calib/step_conf_rate": 0.671875, "calib/step_q_c": 0.8268085106382982, "calib/step_q_c_n": 94.0, "calib/step_q_gap": 0.17778412039439584, "calib/step_q_w": 0.6490243902439023, "calib/step_q_w_n": 123.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.296, "grad_norm": 0.0, "learning_rate": 8.888888888888888e-08, "loss": 0.0, "num_tokens": 48011368.0, "reward": 0.609375, "reward_std": 0.22449977695941925, "rewards/accuracy_reward_step": 0.28125, "rewards/format_reward_step": 0.65625, "step": 185 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.7705296579593014, "calib/avg_num_step_conf": 1.109375, "calib/ece": 0.3654780876494024, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.8125, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": 0.24909149949487663, "calib/mean_conf": 0.6905776892430279, "calib/mu_c": 0.8582926829268293, "calib/mu_w": 0.6092011834319526, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8671875, "calib/pce": 0.36468127490039837, "calib/std_conf": 0.30374810828561666, "calib/step_conf_rate": 0.8671875, "calib/step_q_c": 0.8133684210526315, "calib/step_q_c_n": 95.0, "calib/step_q_gap": 0.09839487607908659, "calib/step_q_w": 0.7149735449735449, "calib/step_q_w_n": 189.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.2976, "grad_norm": 0.0, "learning_rate": 8.333333333333333e-08, "loss": 0.0, "num_tokens": 48273152.0, "reward": 0.7265625, "reward_std": 0.25780707597732544, "rewards/accuracy_reward_step": 0.3203125, "rewards/format_reward_step": 0.8125, "step": 186 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6652142338416848, "calib/avg_num_step_conf": 1.07421875, "calib/ece": 0.40402390438247, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": 0.17209005083514883, "calib/mean_conf": 0.7203585657370517, "calib/mu_c": 0.8369135802469135, "calib/mu_w": 0.6648235294117647, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.4008366533864541, "calib/std_conf": 0.26822916206632613, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.8310752688172042, "calib/step_q_c_n": 93.0, "calib/step_q_gap": 0.11783351057544589, "calib/step_q_w": 0.7132417582417583, "calib/step_q_w_n": 182.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 221.0, "completions/max_terminated_length": 221.0, "completions/mean_length": 0.86328125, "completions/mean_terminated_length": 221.0, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.2992, "grad_norm": 10.69324779510498, "learning_rate": 7.777777777777778e-08, "loss": 0.0386, "num_tokens": 48533581.0, "reward": 0.748046875, "reward_std": 0.17594116926193237, "rewards/accuracy_reward_step": 0.31640625, "rewards/format_reward_step": 0.86328125, "step": 187 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.761209288739594, "calib/avg_num_step_conf": 0.828125, "calib/ece": 0.3547389558232932, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.70703125, "calib/frac_conf_gt_0.9": 0.0321285140562249, "calib/gap": 0.26031035489995613, "calib/mean_conf": 0.6760240963855421, "calib/mu_c": 0.850609756097561, "calib/mu_w": 0.5902994011976048, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.75, "calib/pce": 0.350722891566265, "calib/std_conf": 0.31946594400530953, "calib/step_conf_rate": 0.75, "calib/step_q_c": 0.8476190476190476, "calib/step_q_c_n": 84.0, "calib/step_q_gap": 0.17574404761904772, "calib/step_q_w": 0.6718749999999999, "calib/step_q_w_n": 128.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 960.0, "completions/max_terminated_length": 960.0, "completions/mean_length": 3.75, "completions/mean_terminated_length": 960.0, "completions/min_length": 0.0, "completions/min_terminated_length": 960.0, "epoch": 0.3008, "grad_norm": 0.0, "learning_rate": 7.222222222222221e-08, "loss": 0.0, "num_tokens": 48790861.0, "reward": 0.673828125, "reward_std": 0.2038663625717163, "rewards/accuracy_reward_step": 0.3203125, "rewards/format_reward_step": 0.70703125, "step": 188 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.8780393918008598, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.2922619047619048, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.84375, "calib/frac_conf_gt_0.9": 0.10714285714285714, "calib/gap": 0.2927356130108425, "calib/mean_conf": 0.7248015873015872, "calib/mu_c": 0.8909174311926606, "calib/mu_w": 0.5981818181818181, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.87890625, "calib/pce": 0.2922619047619048, "calib/std_conf": 0.3002267005839291, "calib/step_conf_rate": 0.87890625, "calib/step_q_c": 0.8728828828828831, "calib/step_q_c_n": 111.0, "calib/step_q_gap": 0.23423040061337963, "calib/step_q_w": 0.6386524822695034, "calib/step_q_w_n": 141.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 235.0, "completions/max_terminated_length": 235.0, "completions/mean_length": 1.48828125, "completions/mean_terminated_length": 190.5, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.3024, "grad_norm": 8.809115409851074, "learning_rate": 6.666666666666667e-08, "loss": 0.0332, "num_tokens": 49049130.0, "reward": 0.84765625, "reward_std": 0.17455264925956726, "rewards/accuracy_reward_step": 0.42578125, "rewards/format_reward_step": 0.84375, "step": 189 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.8290183387270765, "calib/avg_num_step_conf": 0.95703125, "calib/ece": 0.4649402390438246, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.76171875, "calib/frac_conf_gt_0.9": 0.08366533864541832, "calib/gap": 0.28670118662351696, "calib/mean_conf": 0.6362549800796812, "calib/mu_c": 0.8715555555555559, "calib/mu_w": 0.5848543689320389, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.8046875, "calib/pce": 0.46095617529880467, "calib/std_conf": 0.3415507153563842, "calib/step_conf_rate": 0.8046875, "calib/step_q_c": 0.8249090909090913, "calib/step_q_c_n": 55.0, "calib/step_q_gap": 0.14096172248803884, "calib/step_q_w": 0.6839473684210524, "calib/step_q_w_n": 190.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.304, "grad_norm": 0.0, "learning_rate": 6.111111111111111e-08, "loss": 0.0, "num_tokens": 49299186.0, "reward": 0.560546875, "reward_std": 0.1769346296787262, "rewards/accuracy_reward_step": 0.1796875, "rewards/format_reward_step": 0.76171875, "step": 190 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7287003120336454, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.40391304347826085, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": 0.1730375797042466, "calib/mean_conf": 0.763596837944664, "calib/mu_c": 0.8743956043956046, "calib/mu_w": 0.701358024691358, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.40391304347826085, "calib/std_conf": 0.24516370974306465, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.8656701030927835, "calib/step_q_c_n": 97.0, "calib/step_q_gap": 0.12069742549715512, "calib/step_q_w": 0.7449726775956284, "calib/step_q_w_n": 183.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 0.76953125, "completions/mean_terminated_length": 98.5, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.3056, "grad_norm": 19.046396255493164, "learning_rate": 5.555555555555555e-08, "loss": 0.0575, "num_tokens": 49560431.0, "reward": 0.806640625, "reward_std": 0.1605677306652069, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.90234375, "step": 191 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7376984126984127, "calib/avg_num_step_conf": 0.96875, "calib/ece": 0.4807539682539681, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.8125, "calib/frac_conf_gt_0.9": 0.03968253968253968, "calib/gap": 0.2616666666666664, "calib/mean_conf": 0.6474206349206351, "calib/mu_c": 0.8654761904761903, "calib/mu_w": 0.6038095238095239, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.8515625, "calib/pce": 0.4807539682539681, "calib/std_conf": 0.3317272223535333, "calib/step_conf_rate": 0.8515625, "calib/step_q_c": 0.8222222222222222, "calib/step_q_c_n": 45.0, "calib/step_q_gap": 0.11975916803503006, "calib/step_q_w": 0.7024630541871921, "calib/step_q_w_n": 203.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 768.0, "completions/max_terminated_length": 768.0, "completions/mean_length": 3.87890625, "completions/mean_terminated_length": 496.5, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.3072, "grad_norm": 6.2011237144470215, "learning_rate": 5e-08, "loss": 0.0518, "num_tokens": 49823568.0, "reward": 0.57421875, "reward_std": 0.19247063994407654, "rewards/accuracy_reward_step": 0.16796875, "rewards/format_reward_step": 0.8125, "step": 192 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.834324582991523, "calib/avg_num_step_conf": 1.14453125, "calib/ece": 0.30621513944223105, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.765625, "calib/frac_conf_gt_0.9": 0.099601593625498, "calib/gap": 0.3342049494120862, "calib/mean_conf": 0.6727490039840638, "calib/mu_c": 0.8844565217391304, "calib/mu_w": 0.5502515723270441, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.80859375, "calib/pce": 0.30621513944223105, "calib/std_conf": 0.3414032630145996, "calib/step_conf_rate": 0.80859375, "calib/step_q_c": 0.8537593984962407, "calib/step_q_c_n": 133.0, "calib/step_q_gap": 0.13188439849624078, "calib/step_q_w": 0.7218749999999999, "calib/step_q_w_n": 160.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 1.0, "completions/max_length": 0.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 0.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 0.0, "completions/min_terminated_length": 0.0, "epoch": 0.3088, "grad_norm": 0.0, "learning_rate": 4.444444444444444e-08, "loss": 0.0, "num_tokens": 50084880.0, "reward": 0.7421875, "reward_std": 0.15903086960315704, "rewards/accuracy_reward_step": 0.359375, "rewards/format_reward_step": 0.765625, "step": 193 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.7670283303194696, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.34651821862348187, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.78125, "calib/frac_conf_gt_0.9": 0.0728744939271255, "calib/gap": 0.3051740506329117, "calib/mean_conf": 0.6663562753036437, "calib/mu_c": 0.8739240506329117, "calib/mu_w": 0.56875, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.8359375, "calib/pce": 0.34651821862348187, "calib/std_conf": 0.33855803207737123, "calib/step_conf_rate": 0.8359375, "calib/step_q_c": 0.8615789473684213, "calib/step_q_c_n": 95.0, "calib/step_q_gap": 0.18036875628561888, "calib/step_q_w": 0.6812101910828025, "calib/step_q_w_n": 157.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.984375, "completions/max_length": 259.0, "completions/max_terminated_length": 259.0, "completions/mean_length": 2.8125, "completions/mean_terminated_length": 180.0, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.3104, "grad_norm": 17.856693267822266, "learning_rate": 3.888888888888889e-08, "loss": 0.0842, "num_tokens": 50345448.0, "reward": 0.69921875, "reward_std": 0.2514913082122803, "rewards/accuracy_reward_step": 0.30859375, "rewards/format_reward_step": 0.78125, "step": 194 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.7878414688759515, "calib/avg_num_step_conf": 0.921875, "calib/ece": 0.3490438247011953, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.04780876494023904, "calib/gap": 0.3164591730108969, "calib/mean_conf": 0.655816733067729, "calib/mu_c": 0.875194805194805, "calib/mu_w": 0.5587356321839081, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.78515625, "calib/pce": 0.3490438247011953, "calib/std_conf": 0.3450313662611969, "calib/step_conf_rate": 0.78515625, "calib/step_q_c": 0.8465263157894733, "calib/step_q_c_n": 95.0, "calib/step_q_gap": 0.18659723777528892, "calib/step_q_w": 0.6599290780141844, "calib/step_q_w_n": 141.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.9921875, "completions/max_length": 239.0, "completions/max_terminated_length": 239.0, "completions/mean_length": 1.546875, "completions/mean_terminated_length": 198.0, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.312, "grad_norm": 9.03911018371582, "learning_rate": 3.3333333333333334e-08, "loss": 0.0399, "num_tokens": 50603364.0, "reward": 0.66796875, "reward_std": 0.24204961955547333, "rewards/accuracy_reward_step": 0.30078125, "rewards/format_reward_step": 0.734375, "step": 195 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6827997489014438, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.3133201581027668, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.8515625, "calib/frac_conf_gt_0.9": 0.08300395256916997, "calib/gap": 0.17855555555555536, "calib/mean_conf": 0.7797233201581029, "calib/mu_c": 0.8749999999999999, "calib/mu_w": 0.6964444444444445, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.87890625, "calib/pce": 0.3133201581027668, "calib/std_conf": 0.2399594879543884, "calib/step_conf_rate": 0.87890625, "calib/step_q_c": 0.8587591240875913, "calib/step_q_c_n": 137.0, "calib/step_q_gap": 0.07079164441279462, "calib/step_q_w": 0.7879674796747966, "calib/step_q_w_n": 123.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 112.0, "completions/max_terminated_length": 112.0, "completions/mean_length": 0.4375, "completions/mean_terminated_length": 112.0, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.3136, "grad_norm": 3.043510675430298, "learning_rate": 2.7777777777777774e-08, "loss": 0.0055, "num_tokens": 50865396.0, "reward": 0.88671875, "reward_std": 0.2121565341949463, "rewards/accuracy_reward_step": 0.4609375, "rewards/format_reward_step": 0.8515625, "step": 196 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7620481927710844, "calib/avg_num_step_conf": 1.1328125, "calib/ece": 0.34219999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 0.028, "calib/gap": 0.2590935169248425, "calib/mean_conf": 0.6781999999999999, "calib/mu_c": 0.8502380952380955, "calib/mu_w": 0.591144578313253, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.90625, "calib/pce": 0.34219999999999995, "calib/std_conf": 0.30800707784075354, "calib/step_conf_rate": 0.90625, "calib/step_q_c": 0.8568539325842699, "calib/step_q_c_n": 89.0, "calib/step_q_gap": 0.19625691765889686, "calib/step_q_w": 0.660597014925373, "calib/step_q_w_n": 201.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 298.0, "completions/max_terminated_length": 298.0, "completions/mean_length": 2.43359375, "completions/mean_terminated_length": 207.6666717529297, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.3152, "grad_norm": 31.83425521850586, "learning_rate": 2.222222222222222e-08, "loss": 0.1026, "num_tokens": 51125291.0, "reward": 0.763671875, "reward_std": 0.18628305196762085, "rewards/accuracy_reward_step": 0.33203125, "rewards/format_reward_step": 0.86328125, "step": 197 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.8009259259259259, "calib/avg_num_step_conf": 0.96875, "calib/ece": 0.39666666666666656, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.08130081300813008, "calib/gap": 0.2887121212121212, "calib/mean_conf": 0.664959349593496, "calib/mu_c": 0.8762121212121212, "calib/mu_w": 0.5875, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.7578125, "calib/pce": 0.39666666666666656, "calib/std_conf": 0.34441271796895645, "calib/step_conf_rate": 0.7578125, "calib/step_q_c": 0.8543055555555555, "calib/step_q_c_n": 72.0, "calib/step_q_gap": 0.1301578282828283, "calib/step_q_w": 0.7241477272727272, "calib/step_q_w_n": 176.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 301.0, "completions/max_terminated_length": 301.0, "completions/mean_length": 1.17578125, "completions/mean_terminated_length": 301.0, "completions/min_length": 0.0, "completions/min_terminated_length": 301.0, "epoch": 0.3168, "grad_norm": 3.0404303073883057, "learning_rate": 1.6666666666666667e-08, "loss": 0.0146, "num_tokens": 51387736.0, "reward": 0.625, "reward_std": 0.24578902125358582, "rewards/accuracy_reward_step": 0.2578125, "rewards/format_reward_step": 0.734375, "step": 198 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.7875337022846601, "calib/avg_num_step_conf": 0.97265625, "calib/ece": 0.34646586345381525, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.05220883534136546, "calib/gap": 0.2692443593018303, "calib/mean_conf": 0.6958634538152609, "calib/mu_c": 0.8710344827586204, "calib/mu_w": 0.6017901234567901, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.85546875, "calib/pce": 0.34646586345381525, "calib/std_conf": 0.304499321471865, "calib/step_conf_rate": 0.85546875, "calib/step_q_c": 0.8557291666666668, "calib/step_q_c_n": 96.0, "calib/step_q_gap": 0.1694546568627452, "calib/step_q_w": 0.6862745098039216, "calib/step_q_w_n": 153.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.99609375, "completions/max_length": 255.0, "completions/max_terminated_length": 255.0, "completions/mean_length": 0.99609375, "completions/mean_terminated_length": 255.0, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.3184, "grad_norm": 6.873912811279297, "learning_rate": 1.111111111111111e-08, "loss": 0.0253, "num_tokens": 51650103.0, "reward": 0.75, "reward_std": 0.19911831617355347, "rewards/accuracy_reward_step": 0.33984375, "rewards/format_reward_step": 0.8203125, "step": 199 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7357768813465017, "calib/avg_num_step_conf": 0.87109375, "calib/ece": 0.311566265060241, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.69140625, "calib/frac_conf_gt_0.9": 0.1606425702811245, "calib/gap": 0.30241062734733604, "calib/mean_conf": 0.6714056224899598, "calib/mu_c": 0.8632967032967032, "calib/mu_w": 0.5608860759493671, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.71484375, "calib/pce": 0.3087550200803213, "calib/std_conf": 0.3414251258986275, "calib/step_conf_rate": 0.71484375, "calib/step_q_c": 0.8533333333333335, "calib/step_q_c_n": 102.0, "calib/step_q_gap": 0.09696969696969726, "calib/step_q_w": 0.7563636363636362, "calib/step_q_w_n": 121.0, "clip_ratio/high_max": NaN, "clip_ratio/high_mean": NaN, "clip_ratio/low_mean": NaN, "clip_ratio/low_min": NaN, "clip_ratio/region_mean": NaN, "completions/clipped_ratio": 0.98828125, "completions/max_length": 431.0, "completions/max_terminated_length": 431.0, "completions/mean_length": 2.87109375, "completions/mean_terminated_length": 245.0, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.32, "grad_norm": 9.112618446350098, "learning_rate": 5.555555555555555e-09, "loss": 0.0405, "num_tokens": 51908390.0, "reward": 0.701171875, "reward_std": 0.19281864166259766, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.69140625, "step": 200 }, { "epoch": 0.32, "step": 200, "total_flos": 0.0, "train_loss": 0.01296201538760215, "train_runtime": 3528.5457, "train_samples_per_second": 14.51, "train_steps_per_second": 0.057 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 51908390, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }