Files
PureRL-1.5B-v6i-A-step01-fi…/trainer_state.json
ModelHub XC 08256c64c4 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/PureRL-1.5B-v6i-A-step01-final01
Source: Original Platform
2026-06-01 12:24:23 +08:00

11038 lines
445 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"aux_distill/final_loss": 0.6349183150700161,
"aux_distill/lambda": 0.1,
"aux_distill/lambda_final": 0.1,
"aux_distill/loss": 0.20483983627387456,
"aux_distill/mean_u": 0.31677682190706,
"aux_distill/n_active_final_tok": 3.7142857142857144,
"aux_distill/n_active_tok": 24.571428571428573,
"aux_distill/step_loss": 1.4134800136089325,
"calib/answer_extract_rate": 0.08203125,
"calib/auroc": 0.6944444444444445,
"calib/avg_num_step_conf": 0.3359375,
"calib/ece": 0.6230769230769231,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.03861111111111115,
"calib/mean_conf": 0.9307692307692309,
"calib/mu_c": 0.9575,
"calib/mu_w": 0.9188888888888889,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.09765625,
"calib/nonempty_step_conf_rate": 0.0703125,
"calib/pce": 0.6230769230769231,
"calib/std_conf": 0.07965903671384378,
"calib/step_conf_rate": 0.0703125,
"calib/step_q_c": 0.8921052631578947,
"calib/step_q_c_n": 19.0,
"calib/step_q_gap": 0.19807541241162607,
"calib/step_q_w": 0.6940298507462687,
"calib/step_q_w_n": 67.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2955.0,
"completions/max_terminated_length": 2955.0,
"completions/mean_length": 613.67578125,
"completions/mean_terminated_length": 674.2532348632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.02741635963320732,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.121,
"num_tokens": 264685.0,
"reward": 0.037574999034404755,
"reward_std": 0.07449960708618164,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.01655624993145466,
"rewards/format_reward_step": 0.04296875,
"step": 1
},
{
"aux_distill/final_loss": 0.5865676277562192,
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/lambda_final": 0.10000000000000003,
"aux_distill/loss": 0.1695779765907087,
"aux_distill/mean_u": 0.2935626227740425,
"aux_distill/n_active_final_tok": 4.631578947368421,
"aux_distill/n_active_tok": 28.63157894736842,
"aux_distill/step_loss": 1.1092121005058289,
"calib/answer_extract_rate": 0.13671875,
"calib/auroc": 0.5338345864661654,
"calib/avg_num_step_conf": 0.55078125,
"calib/ece": 0.6261538461538463,
"calib/final_conf_rate": 0.1015625,
"calib/format_rate": 0.08984375,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.002406015037593856,
"calib/mean_conf": 0.8953846153846153,
"calib/mu_c": 0.897142857142857,
"calib/mu_w": 0.8947368421052632,
"calib/nonempty_final_conf_rate": 0.1015625,
"calib/nonempty_reasoning_rate": 0.14453125,
"calib/nonempty_step_conf_rate": 0.109375,
"calib/pce": 0.6261538461538463,
"calib/std_conf": 0.18653172073466937,
"calib/step_conf_rate": 0.109375,
"calib/step_q_c": 0.781,
"calib/step_q_c_n": 20.0,
"calib/step_q_gap": -0.042553719008264435,
"calib/step_q_w": 0.8235537190082645,
"calib/step_q_w_n": 121.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0546875,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 646.4609375,
"completions/mean_terminated_length": 683.8594970703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.03957248851656914,
"learning_rate": 5.000000000000001e-07,
"loss": 0.1661,
"num_tokens": 533467.0,
"reward": 0.07537207007408142,
"reward_std": 0.14035090804100037,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.02965039201080799,
"rewards/format_reward_step": 0.08984375,
"step": 2
},
{
"aux_distill/final_loss": 0.4299386143684387,
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/lambda_final": 0.09999999999999999,
"aux_distill/loss": 0.19136657193303108,
"aux_distill/mean_u": 0.31480732275070794,
"aux_distill/n_active_final_tok": 3.0,
"aux_distill/n_active_tok": 23.666666666666668,
"aux_distill/step_loss": 1.483727087577184,
"calib/answer_extract_rate": 0.0625,
"calib/auroc": 0.7407407407407407,
"calib/avg_num_step_conf": 0.27734375,
"calib/ece": 0.5583333333333332,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.02734375,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": 0.21111111111111114,
"calib/mean_conf": 0.8083333333333335,
"calib/mu_c": 0.9666666666666667,
"calib/mu_w": 0.7555555555555555,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.0859375,
"calib/nonempty_step_conf_rate": 0.0546875,
"calib/pce": 0.5583333333333332,
"calib/std_conf": 0.3195004781773504,
"calib/step_conf_rate": 0.0546875,
"calib/step_q_c": 0.6784615384615384,
"calib/step_q_c_n": 13.0,
"calib/step_q_gap": 0.04570291777188318,
"calib/step_q_w": 0.6327586206896553,
"calib/step_q_w_n": 58.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 3037.0,
"completions/max_terminated_length": 3037.0,
"completions/mean_length": 667.97265625,
"completions/mean_terminated_length": 743.4826049804688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.020845437422394753,
"learning_rate": 7.5e-07,
"loss": 0.0895,
"num_tokens": 809724.0,
"reward": 0.027754880487918854,
"reward_std": 0.07302109897136688,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.016447264701128006,
"rewards/format_reward_step": 0.02734375,
"step": 3
},
{
"aux_distill/final_loss": 0.4729306432935927,
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/lambda_final": 0.09999999999999999,
"aux_distill/loss": 0.16556998011138704,
"aux_distill/mean_u": 0.3788802493180593,
"aux_distill/n_active_final_tok": 3.111111111111111,
"aux_distill/n_active_tok": 30.666666666666668,
"aux_distill/step_loss": 1.1827691396077473,
"calib/answer_extract_rate": 0.0625,
"calib/auroc": 0.35000000000000003,
"calib/avg_num_step_conf": 0.26953125,
"calib/ece": 0.850909090909091,
"calib/final_conf_rate": 0.04296875,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.7272727272727273,
"calib/gap": 0.008999999999999897,
"calib/mean_conf": 0.9418181818181819,
"calib/mu_c": 0.95,
"calib/mu_w": 0.9410000000000001,
"calib/nonempty_final_conf_rate": 0.04296875,
"calib/nonempty_reasoning_rate": 0.07421875,
"calib/nonempty_step_conf_rate": 0.0390625,
"calib/pce": 0.850909090909091,
"calib/std_conf": 0.049141388188143925,
"calib/step_conf_rate": 0.0390625,
"calib/step_q_c": 0.98,
"calib/step_q_c_n": 1.0,
"calib/step_q_gap": 0.13941176470588246,
"calib/step_q_w": 0.8405882352941175,
"calib/step_q_w_n": 68.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3071.0,
"completions/max_terminated_length": 3071.0,
"completions/mean_length": 771.17578125,
"completions/mean_terminated_length": 836.5296630859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.01598433405160904,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0348,
"num_tokens": 1113313.0,
"reward": 0.016414452344179153,
"reward_std": 0.04068883880972862,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.005485156085342169,
"rewards/format_reward_step": 0.0234375,
"step": 4
},
{
"aux_distill/final_loss": 0.43701744079589844,
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/lambda_final": 0.09999999999999999,
"aux_distill/loss": 0.14601918309926987,
"aux_distill/mean_u": 0.35960835996630164,
"aux_distill/n_active_final_tok": 4.0,
"aux_distill/n_active_tok": 28.571428571428573,
"aux_distill/step_loss": 1.0231743454933167,
"calib/answer_extract_rate": 0.046875,
"calib/auroc": 1.0,
"calib/avg_num_step_conf": 0.19921875,
"calib/ece": 0.6382857142857142,
"calib/final_conf_rate": 0.02734375,
"calib/format_rate": 0.02734375,
"calib/frac_conf_gt_0.9": 0.7142857142857143,
"calib/gap": 0.05599999999999994,
"calib/mean_conf": 0.924,
"calib/mu_c": 0.964,
"calib/mu_w": 0.908,
"calib/nonempty_final_conf_rate": 0.02734375,
"calib/nonempty_reasoning_rate": 0.05078125,
"calib/nonempty_step_conf_rate": 0.03515625,
"calib/pce": 0.6382857142857142,
"calib/std_conf": 0.03778132569707647,
"calib/step_conf_rate": 0.03515625,
"calib/step_q_c": 0.8965,
"calib/step_q_c_n": 12.0,
"calib/step_q_gap": 0.16291025641025636,
"calib/step_q_w": 0.7335897435897436,
"calib/step_q_w_n": 39.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 3063.0,
"completions/max_terminated_length": 3063.0,
"completions/mean_length": 738.2734375,
"completions/mean_terminated_length": 814.6465454101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.013780995272099972,
"learning_rate": 1.25e-06,
"loss": 0.0589,
"num_tokens": 1408999.0,
"reward": 0.023182764649391174,
"reward_std": 0.05281626060605049,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.011209281161427498,
"rewards/format_reward_step": 0.02734375,
"step": 5
},
{
"aux_distill/final_loss": 0.623933769762516,
"aux_distill/lambda": 0.10000000000000002,
"aux_distill/lambda_final": 0.10000000000000002,
"aux_distill/loss": 0.19028319464996457,
"aux_distill/mean_u": 0.2834644576625892,
"aux_distill/n_active_final_tok": 5.25,
"aux_distill/n_active_tok": 30.0,
"aux_distill/step_loss": 1.2788981422781944,
"calib/answer_extract_rate": 0.10546875,
"calib/auroc": 0.41875,
"calib/avg_num_step_conf": 0.46875,
"calib/ece": 0.6885714285714284,
"calib/final_conf_rate": 0.08203125,
"calib/format_rate": 0.078125,
"calib/frac_conf_gt_0.9": 0.8095238095238095,
"calib/gap": 0.024124999999999952,
"calib/mean_conf": 0.9076190476190474,
"calib/mu_c": 0.9259999999999999,
"calib/mu_w": 0.901875,
"calib/nonempty_final_conf_rate": 0.08203125,
"calib/nonempty_reasoning_rate": 0.1171875,
"calib/nonempty_step_conf_rate": 0.09375,
"calib/pce": 0.6790476190476189,
"calib/std_conf": 0.1934130729295343,
"calib/step_conf_rate": 0.09375,
"calib/step_q_c": 0.7655555555555554,
"calib/step_q_c_n": 27.0,
"calib/step_q_gap": -0.006508960573476719,
"calib/step_q_w": 0.7720645161290322,
"calib/step_q_w_n": 93.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 3050.0,
"completions/max_terminated_length": 3050.0,
"completions/mean_length": 575.43359375,
"completions/mean_terminated_length": 621.5653686523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.03270503506064415,
"learning_rate": 1.5e-06,
"loss": 0.0907,
"num_tokens": 1662262.0,
"reward": 0.06276483833789825,
"reward_std": 0.1288246214389801,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.027873437851667404,
"rewards/format_reward_step": 0.078125,
"step": 6
},
{
"aux_distill/final_loss": 0.480152342054579,
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/lambda_final": 0.10000000000000003,
"aux_distill/loss": 0.17669792142179278,
"aux_distill/mean_u": 0.3248833079364306,
"aux_distill/n_active_final_tok": 4.444444444444445,
"aux_distill/n_active_tok": 29.77777777777778,
"aux_distill/step_loss": 1.2868268423610263,
"calib/answer_extract_rate": 0.125,
"calib/auroc": 0.5992063492063492,
"calib/avg_num_step_conf": 0.54296875,
"calib/ece": 0.6348,
"calib/final_conf_rate": 0.09765625,
"calib/format_rate": 0.0703125,
"calib/frac_conf_gt_0.9": 0.76,
"calib/gap": 0.05682539682539678,
"calib/mean_conf": 0.9148,
"calib/mu_c": 0.9557142857142856,
"calib/mu_w": 0.8988888888888888,
"calib/nonempty_final_conf_rate": 0.09765625,
"calib/nonempty_reasoning_rate": 0.140625,
"calib/nonempty_step_conf_rate": 0.109375,
"calib/pce": 0.6348,
"calib/std_conf": 0.10564544476691837,
"calib/step_conf_rate": 0.109375,
"calib/step_q_c": 0.8315384615384614,
"calib/step_q_c_n": 26.0,
"calib/step_q_gap": 0.04466346153846146,
"calib/step_q_w": 0.786875,
"calib/step_q_w_n": 112.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 3070.0,
"completions/max_terminated_length": 3070.0,
"completions/mean_length": 700.63671875,
"completions/mean_terminated_length": 769.7982788085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.030571160838007927,
"learning_rate": 1.75e-06,
"loss": 0.1156,
"num_tokens": 1949049.0,
"reward": 0.06588749587535858,
"reward_std": 0.1118675172328949,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.03021249920129776,
"rewards/format_reward_step": 0.0703125,
"step": 7
},
{
"aux_distill/final_loss": 0.5085558457808061,
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/lambda_final": 0.09999999999999999,
"aux_distill/loss": 0.19147660447792572,
"aux_distill/mean_u": 0.3443807402134946,
"aux_distill/n_active_final_tok": 3.6363636363636362,
"aux_distill/n_active_tok": 24.727272727272727,
"aux_distill/step_loss": 1.4062101624228738,
"calib/answer_extract_rate": 0.0859375,
"calib/auroc": 0.36363636363636365,
"calib/avg_num_step_conf": 0.2734375,
"calib/ece": 0.696153846153846,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.6923076923076923,
"calib/gap": 0.06500000000000006,
"calib/mean_conf": 0.8500000000000001,
"calib/mu_c": 0.905,
"calib/mu_w": 0.84,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.0625,
"calib/pce": 0.696153846153846,
"calib/std_conf": 0.2605910441816685,
"calib/step_conf_rate": 0.0625,
"calib/step_q_c": 0.7890909090909091,
"calib/step_q_c_n": 11.0,
"calib/step_q_gap": -0.02260400616332814,
"calib/step_q_w": 0.8116949152542372,
"calib/step_q_w_n": 59.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11328125,
"completions/max_length": 3040.0,
"completions/max_terminated_length": 3040.0,
"completions/mean_length": 613.6875,
"completions/mean_terminated_length": 692.0880737304688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.018061090260744095,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0622,
"num_tokens": 2212665.0,
"reward": 0.03364472836256027,
"reward_std": 0.06432777643203735,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.016508202999830246,
"rewards/format_reward_step": 0.0390625,
"step": 8
},
{
"aux_distill/final_loss": 0.39097317059834796,
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/lambda_final": 0.09999999999999999,
"aux_distill/loss": 0.18374727790554365,
"aux_distill/mean_u": 0.29662109967481987,
"aux_distill/n_active_final_tok": 2.0,
"aux_distill/n_active_tok": 15.666666666666666,
"aux_distill/step_loss": 1.4464995861053467,
"calib/answer_extract_rate": 0.0703125,
"calib/auroc": 0.75,
"calib/avg_num_step_conf": 0.18359375,
"calib/ece": 0.6016666666666666,
"calib/final_conf_rate": 0.0234375,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": 0.05249999999999999,
"calib/mean_conf": 0.9349999999999999,
"calib/mu_c": 0.97,
"calib/mu_w": 0.9175,
"calib/nonempty_final_conf_rate": 0.0234375,
"calib/nonempty_reasoning_rate": 0.0859375,
"calib/nonempty_step_conf_rate": 0.046875,
"calib/pce": 0.6016666666666666,
"calib/std_conf": 0.06344288770224757,
"calib/step_conf_rate": 0.046875,
"calib/step_q_c": 0.8283333333333333,
"calib/step_q_c_n": 6.0,
"calib/step_q_gap": 0.07784552845528436,
"calib/step_q_w": 0.7504878048780489,
"calib/step_q_w_n": 41.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3015.0,
"completions/max_terminated_length": 3015.0,
"completions/mean_length": 616.828125,
"completions/mean_terminated_length": 669.1016845703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0096,
"grad_norm": 0.021595383062958717,
"learning_rate": 2.25e-06,
"loss": 0.0848,
"num_tokens": 2478109.0,
"reward": 0.020723631605505943,
"reward_std": 0.05178426578640938,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.010197265073657036,
"rewards/format_reward_step": 0.0234375,
"step": 9
},
{
"aux_distill/final_loss": 0.3737011637006487,
"aux_distill/lambda": 0.1,
"aux_distill/lambda_final": 0.1,
"aux_distill/loss": 0.1851541953427451,
"aux_distill/mean_u": 0.21911699772481932,
"aux_distill/n_active_final_tok": 2.2857142857142856,
"aux_distill/n_active_tok": 19.714285714285715,
"aux_distill/step_loss": 1.4778407428945814,
"calib/answer_extract_rate": 0.08984375,
"calib/auroc": 0.375,
"calib/avg_num_step_conf": 0.26953125,
"calib/ece": 0.8366666666666669,
"calib/final_conf_rate": 0.03515625,
"calib/format_rate": 0.01953125,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": -0.019999999999999907,
"calib/mean_conf": 0.9477777777777779,
"calib/mu_c": 0.93,
"calib/mu_w": 0.95,
"calib/nonempty_final_conf_rate": 0.03515625,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.8366666666666669,
"calib/std_conf": 0.03456966485800897,
"calib/step_conf_rate": 0.05859375,
"calib/step_q_c": 0.8927272727272727,
"calib/step_q_c_n": 11.0,
"calib/step_q_gap": 0.06815830721003124,
"calib/step_q_w": 0.8245689655172415,
"calib/step_q_w_n": 58.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3055.0,
"completions/max_terminated_length": 3055.0,
"completions/mean_length": 652.09765625,
"completions/mean_terminated_length": 707.3601684570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.021187864243984222,
"learning_rate": 2.5e-06,
"loss": 0.09,
"num_tokens": 2751846.0,
"reward": 0.016434960067272186,
"reward_std": 0.046485088765621185,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.005526171997189522,
"rewards/format_reward_step": 0.01953125,
"step": 10
},
{
"aux_distill/final_loss": 0.37451341877812927,
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/lambda_final": 0.10000000000000003,
"aux_distill/loss": 0.17273964635703876,
"aux_distill/mean_u": 0.2728877732111283,
"aux_distill/n_active_final_tok": 2.9565217391304346,
"aux_distill/n_active_tok": 18.434782608695652,
"aux_distill/step_loss": 1.3528830020324043,
"calib/answer_extract_rate": 0.12890625,
"calib/auroc": 0.0,
"calib/avg_num_step_conf": 0.4296875,
"calib/ece": 0.8352631578947369,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.0625,
"calib/frac_conf_gt_0.9": 0.7894736842105263,
"calib/gap": -0.1570588235294117,
"calib/mean_conf": 0.9405263157894738,
"calib/mu_c": 0.8,
"calib/mu_w": 0.9570588235294117,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.15625,
"calib/nonempty_step_conf_rate": 0.109375,
"calib/pce": 0.8352631578947369,
"calib/std_conf": 0.06637006968193183,
"calib/step_conf_rate": 0.109375,
"calib/step_q_c": 0.81,
"calib/step_q_c_n": 9.0,
"calib/step_q_gap": 0.028242574257425823,
"calib/step_q_w": 0.7817574257425742,
"calib/step_q_w_n": 101.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2849.0,
"completions/max_terminated_length": 2849.0,
"completions/mean_length": 678.2109375,
"completions/mean_terminated_length": 754.8782348632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.030223537236452103,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.149,
"num_tokens": 3029948.0,
"reward": 0.04105761647224426,
"reward_std": 0.07741484045982361,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.007896484807133675,
"rewards/format_reward_step": 0.0625,
"step": 11
},
{
"aux_distill/final_loss": 0.4656365607914172,
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/lambda_final": 0.10000000000000003,
"aux_distill/loss": 0.17278049298022924,
"aux_distill/mean_u": 0.4000783307520668,
"aux_distill/n_active_final_tok": 4.631578947368421,
"aux_distill/n_active_tok": 32.421052631578945,
"aux_distill/step_loss": 1.262168332150108,
"calib/answer_extract_rate": 0.12890625,
"calib/auroc": 0.30526315789473685,
"calib/avg_num_step_conf": 0.609375,
"calib/ece": 0.735,
"calib/final_conf_rate": 0.09375,
"calib/format_rate": 0.0859375,
"calib/frac_conf_gt_0.9": 0.7916666666666666,
"calib/gap": -0.011789473684210572,
"calib/mean_conf": 0.9433333333333334,
"calib/mu_c": 0.9339999999999999,
"calib/mu_w": 0.9457894736842105,
"calib/nonempty_final_conf_rate": 0.09375,
"calib/nonempty_reasoning_rate": 0.1640625,
"calib/nonempty_step_conf_rate": 0.125,
"calib/pce": 0.735,
"calib/std_conf": 0.07168604389202189,
"calib/step_conf_rate": 0.125,
"calib/step_q_c": 0.7010000000000001,
"calib/step_q_c_n": 20.0,
"calib/step_q_gap": -0.11686764705882347,
"calib/step_q_w": 0.8178676470588235,
"calib/step_q_w_n": 136.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 2880.0,
"completions/max_terminated_length": 2880.0,
"completions/mean_length": 533.29296875,
"completions/mean_terminated_length": 583.431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0128,
"grad_norm": 0.02594468556344509,
"learning_rate": 3e-06,
"loss": 0.1238,
"num_tokens": 3270647.0,
"reward": 0.06582578271627426,
"reward_std": 0.11194424331188202,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.026182813569903374,
"rewards/format_reward_step": 0.0859375,
"step": 12
},
{
"aux_distill/final_loss": 0.3937861807644367,
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/lambda_final": 0.10000000000000003,
"aux_distill/loss": 0.16708447442700466,
"aux_distill/mean_u": 0.34409069323454206,
"aux_distill/n_active_final_tok": 4.0,
"aux_distill/n_active_tok": 27.333333333333332,
"aux_distill/step_loss": 1.277058516939481,
"calib/answer_extract_rate": 0.12890625,
"calib/auroc": 0.49038461538461536,
"calib/avg_num_step_conf": 0.671875,
"calib/ece": 0.72934375,
"calib/final_conf_rate": 0.125,
"calib/format_rate": 0.0859375,
"calib/frac_conf_gt_0.9": 0.78125,
"calib/gap": 0.042858974358974455,
"calib/mean_conf": 0.91684375,
"calib/mu_c": 0.9516666666666667,
"calib/mu_w": 0.9088076923076922,
"calib/nonempty_final_conf_rate": 0.125,
"calib/nonempty_reasoning_rate": 0.16796875,
"calib/nonempty_step_conf_rate": 0.14453125,
"calib/pce": 0.72934375,
"calib/std_conf": 0.11660861604503116,
"calib/step_conf_rate": 0.14453125,
"calib/step_q_c": 0.7872727272727271,
"calib/step_q_c_n": 22.0,
"calib/step_q_gap": 0.002272727272727093,
"calib/step_q_w": 0.785,
"calib/step_q_w_n": 150.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 2631.0,
"completions/max_terminated_length": 2631.0,
"completions/mean_length": 626.1328125,
"completions/mean_terminated_length": 667.8750610351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.029530515894293785,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.1755,
"num_tokens": 3535529.0,
"reward": 0.06705722212791443,
"reward_std": 0.1322254240512848,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.024739447981119156,
"rewards/format_reward_step": 0.0859375,
"step": 13
},
{
"aux_distill/final_loss": 0.2387990951538086,
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/lambda_final": 0.10000000000000003,
"aux_distill/loss": 0.13480552989575598,
"aux_distill/mean_u": 0.25604527477694694,
"aux_distill/n_active_final_tok": 4.222222222222222,
"aux_distill/n_active_tok": 34.0,
"aux_distill/step_loss": 1.1092561682065327,
"calib/answer_extract_rate": 0.1171875,
"calib/auroc": 0.6142857142857143,
"calib/avg_num_step_conf": 0.59765625,
"calib/ece": 0.5931578947368421,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.05859375,
"calib/frac_conf_gt_0.9": 0.7894736842105263,
"calib/gap": 0.059285714285714386,
"calib/mean_conf": 0.8563157894736841,
"calib/mu_c": 0.9,
"calib/mu_w": 0.8407142857142856,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.1328125,
"calib/nonempty_step_conf_rate": 0.1015625,
"calib/pce": 0.5931578947368421,
"calib/std_conf": 0.23414369241570643,
"calib/step_conf_rate": 0.1015625,
"calib/step_q_c": 0.6892592592592592,
"calib/step_q_c_n": 27.0,
"calib/step_q_gap": 0.03703465608465617,
"calib/step_q_w": 0.6522246031746031,
"calib/step_q_w_n": 126.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.06640625,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 650.390625,
"completions/mean_terminated_length": 696.6527099609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.021831141784787178,
"learning_rate": 3.5e-06,
"loss": 0.1041,
"num_tokens": 3807429.0,
"reward": 0.056269921362400055,
"reward_std": 0.08187633007764816,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.03050859272480011,
"rewards/format_reward_step": 0.05859375,
"step": 14
},
{
"aux_distill/final_loss": 0.20902437912790398,
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/lambda_final": 0.10000000000000003,
"aux_distill/loss": 0.13648096746519991,
"aux_distill/mean_u": 0.28301270779921556,
"aux_distill/n_active_final_tok": 3.5789473684210527,
"aux_distill/n_active_tok": 26.94736842105263,
"aux_distill/step_loss": 1.155785243762167,
"calib/answer_extract_rate": 0.109375,
"calib/auroc": 0.3452380952380953,
"calib/avg_num_step_conf": 0.50390625,
"calib/ece": 0.6547826086956522,
"calib/final_conf_rate": 0.08984375,
"calib/format_rate": 0.06640625,
"calib/frac_conf_gt_0.9": 0.43478260869565216,
"calib/gap": -0.20214285714285707,
"calib/mean_conf": 0.7095652173913044,
"calib/mu_c": 0.525,
"calib/mu_w": 0.7271428571428571,
"calib/nonempty_final_conf_rate": 0.08984375,
"calib/nonempty_reasoning_rate": 0.15234375,
"calib/nonempty_step_conf_rate": 0.1171875,
"calib/pce": 0.6386956521739131,
"calib/std_conf": 0.32915541803979576,
"calib/step_conf_rate": 0.1171875,
"calib/step_q_c": 0.6079999999999999,
"calib/step_q_c_n": 5.0,
"calib/step_q_gap": 0.03203225806451604,
"calib/step_q_w": 0.5759677419354838,
"calib/step_q_w_n": 124.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 3033.0,
"completions/max_terminated_length": 3033.0,
"completions/mean_length": 597.63671875,
"completions/mean_terminated_length": 665.1956176757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.016,
"grad_norm": 0.020071037113666534,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0956,
"num_tokens": 4068304.0,
"reward": 0.05219629406929016,
"reward_std": 0.11629487574100494,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.030173826962709427,
"rewards/format_reward_step": 0.06640625,
"step": 15
},
{
"aux_distill/final_loss": 0.22446956237157187,
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/lambda_final": 0.09999999999999999,
"aux_distill/loss": 0.14110560218493143,
"aux_distill/mean_u": 0.47624583883926563,
"aux_distill/n_active_final_tok": 2.3333333333333335,
"aux_distill/n_active_tok": 21.333333333333332,
"aux_distill/step_loss": 1.18658642967542,
"calib/answer_extract_rate": 0.04296875,
"calib/auroc": 0.5666666666666667,
"calib/avg_num_step_conf": 0.25,
"calib/ece": 0.5200000000000002,
"calib/final_conf_rate": 0.03125,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.875,
"calib/gap": 0.1146666666666667,
"calib/mean_conf": 0.895,
"calib/mu_c": 0.9666666666666667,
"calib/mu_w": 0.852,
"calib/nonempty_final_conf_rate": 0.03125,
"calib/nonempty_reasoning_rate": 0.07421875,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.5200000000000002,
"calib/std_conf": 0.188547076349648,
"calib/step_conf_rate": 0.05859375,
"calib/step_q_c": 0.8644444444444445,
"calib/step_q_c_n": 9.0,
"calib/step_q_gap": 0.31748080808080825,
"calib/step_q_w": 0.5469636363636362,
"calib/step_q_w_n": 55.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 703.5859375,
"completions/mean_terminated_length": 783.1217041015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.013928981497883797,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0638,
"num_tokens": 4357270.0,
"reward": 0.025447461754083633,
"reward_std": 0.06808701902627945,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.011832421645522118,
"rewards/format_reward_step": 0.0234375,
"step": 16
},
{
"aux_distill/final_loss": 0.2633027576264881,
"aux_distill/lambda": 0.10000000000000002,
"aux_distill/lambda_final": 0.10000000000000002,
"aux_distill/loss": 0.13817484215611503,
"aux_distill/mean_u": 0.3446736028406753,
"aux_distill/n_active_final_tok": 3.8095238095238093,
"aux_distill/n_active_tok": 37.142857142857146,
"aux_distill/step_loss": 1.118445634841919,
"calib/answer_extract_rate": 0.109375,
"calib/auroc": 0.8026315789473685,
"calib/avg_num_step_conf": 0.765625,
"calib/ece": 0.6379999999999999,
"calib/final_conf_rate": 0.08984375,
"calib/format_rate": 0.06640625,
"calib/frac_conf_gt_0.9": 0.5217391304347826,
"calib/gap": 0.12478947368421045,
"calib/mean_conf": 0.8119130434782609,
"calib/mu_c": 0.9149999999999999,
"calib/mu_w": 0.7902105263157895,
"calib/nonempty_final_conf_rate": 0.08984375,
"calib/nonempty_reasoning_rate": 0.1484375,
"calib/nonempty_step_conf_rate": 0.12109375,
"calib/pce": 0.6379999999999999,
"calib/std_conf": 0.24913867502875792,
"calib/step_conf_rate": 0.12109375,
"calib/step_q_c": 0.4613333333333333,
"calib/step_q_c_n": 15.0,
"calib/step_q_gap": -0.11110478821362796,
"calib/step_q_w": 0.5724381215469613,
"calib/step_q_w_n": 181.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 633.1796875,
"completions/mean_terminated_length": 704.7564697265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.01893424428999424,
"learning_rate": 4.25e-06,
"loss": 0.0975,
"num_tokens": 4622892.0,
"reward": 0.05825863778591156,
"reward_std": 0.09287722408771515,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.030579781159758568,
"rewards/format_reward_step": 0.06640625,
"step": 17
},
{
"aux_distill/final_loss": 0.19629781896417792,
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/lambda_final": 0.09999999999999999,
"aux_distill/loss": 0.13067916916175323,
"aux_distill/mean_u": 0.32321768464600764,
"aux_distill/n_active_final_tok": 3.272727272727273,
"aux_distill/n_active_tok": 18.90909090909091,
"aux_distill/step_loss": 1.110493849624287,
"calib/answer_extract_rate": 0.046875,
"calib/avg_num_step_conf": 0.203125,
"calib/ece": 0.734,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.4,
"calib/mean_conf": 0.7340000000000001,
"calib/mu_c": NaN,
"calib/mu_w": 0.7340000000000001,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.05859375,
"calib/nonempty_step_conf_rate": 0.04296875,
"calib/pce": 0.734,
"calib/std_conf": 0.32255852182200984,
"calib/step_conf_rate": 0.04296875,
"calib/step_q_w": 0.44249999999999995,
"calib/step_q_w_n": 52.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1328125,
"completions/max_length": 2930.0,
"completions/max_terminated_length": 2930.0,
"completions/mean_length": 616.7109375,
"completions/mean_terminated_length": 711.1621704101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0192,
"grad_norm": 0.013651098124682903,
"learning_rate": 4.5e-06,
"loss": 0.0664,
"num_tokens": 4891490.0,
"reward": 0.02233300730586052,
"reward_std": 0.05675097182393074,
"rewards/accuracy_reward_step": 0.0,
"rewards/final_brier_reward_step": 0.013416014611721039,
"rewards/format_reward_step": 0.03125,
"step": 18
},
{
"aux_distill/final_loss": 0.11861236074141093,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.13091924147946493,
"aux_distill/mean_u": 0.4190444197773418,
"aux_distill/n_active_final_tok": 4.428571428571429,
"aux_distill/n_active_tok": 31.428571428571427,
"aux_distill/step_loss": 1.1905800317014967,
"calib/answer_extract_rate": 0.19140625,
"calib/auroc": 0.7720588235294117,
"calib/avg_num_step_conf": 0.859375,
"calib/ece": 0.3972222222222222,
"calib/final_conf_rate": 0.140625,
"calib/format_rate": 0.1015625,
"calib/frac_conf_gt_0.9": 0.2222222222222222,
"calib/gap": 0.4311764705882354,
"calib/mean_conf": 0.4527777777777778,
"calib/mu_c": 0.8600000000000001,
"calib/mu_w": 0.4288235294117647,
"calib/nonempty_final_conf_rate": 0.140625,
"calib/nonempty_reasoning_rate": 0.25,
"calib/nonempty_step_conf_rate": 0.203125,
"calib/pce": 0.3972222222222222,
"calib/std_conf": 0.36656332718850393,
"calib/step_conf_rate": 0.203125,
"calib/step_q_c": 0.34777777777777774,
"calib/step_q_c_n": 9.0,
"calib/step_q_gap": -0.05961179568193792,
"calib/step_q_w": 0.40738957345971566,
"calib/step_q_w_n": 211.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 2726.0,
"completions/max_terminated_length": 2726.0,
"completions/mean_length": 527.7734375,
"completions/mean_terminated_length": 582.3706665039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.020213766023516655,
"learning_rate": 4.75e-06,
"loss": 0.1487,
"num_tokens": 5131360.0,
"reward": 0.09431396424770355,
"reward_std": 0.14942510426044464,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.0753466784954071,
"rewards/format_reward_step": 0.1015625,
"step": 19
},
{
"aux_distill/final_loss": 0.1630369145423174,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.11918571154619086,
"aux_distill/mean_u": 0.31566692087739423,
"aux_distill/n_active_final_tok": 5.931034482758621,
"aux_distill/n_active_tok": 47.310344827586206,
"aux_distill/step_loss": 1.0288201899364078,
"calib/answer_extract_rate": 0.265625,
"calib/auroc": 0.4778012684989429,
"calib/avg_num_step_conf": 1.33984375,
"calib/ece": 0.4133395740740742,
"calib/final_conf_rate": 0.2109375,
"calib/format_rate": 0.1484375,
"calib/frac_conf_gt_0.9": 0.2222222222222222,
"calib/gap": -0.017851389006342477,
"calib/mean_conf": 0.45148772222222217,
"calib/mu_c": 0.4372727272727273,
"calib/mu_w": 0.4551241162790698,
"calib/nonempty_final_conf_rate": 0.2109375,
"calib/nonempty_reasoning_rate": 0.3828125,
"calib/nonempty_step_conf_rate": 0.27734375,
"calib/pce": 0.33056179629629634,
"calib/std_conf": 0.37462261055797835,
"calib/step_conf_rate": 0.27734375,
"calib/step_q_c": 0.3368372093023256,
"calib/step_q_c_n": 43.0,
"calib/step_q_gap": -0.06946942736434114,
"calib/step_q_w": 0.4063066366666667,
"calib/step_q_w_n": 300.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04296875,
"completions/max_length": 3067.0,
"completions/max_terminated_length": 3067.0,
"completions/mean_length": 570.82421875,
"completions/mean_terminated_length": 596.4530639648438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.015477465465664864,
"learning_rate": 5e-06,
"loss": 0.1807,
"num_tokens": 5382363.0,
"reward": 0.14076992869377136,
"reward_std": 0.2674937844276428,
"rewards/accuracy_reward_step": 0.04296875,
"rewards/final_brier_reward_step": 0.09013360738754272,
"rewards/format_reward_step": 0.1484375,
"step": 20
},
{
"aux_distill/final_loss": 0.07824607957154513,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.12015693659583727,
"aux_distill/mean_u": 0.32890082250085595,
"aux_distill/n_active_final_tok": 5.466666666666667,
"aux_distill/n_active_tok": 37.46666666666667,
"aux_distill/step_loss": 1.123323275645574,
"calib/answer_extract_rate": 0.27734375,
"calib/auroc": 0.6212765957446809,
"calib/avg_num_step_conf": 1.109375,
"calib/ece": 0.33134515062699055,
"calib/final_conf_rate": 0.22265625,
"calib/format_rate": 0.15625,
"calib/frac_conf_gt_0.9": 0.15789473684210525,
"calib/gap": 0.18378028905826505,
"calib/mean_conf": 0.4314618669168691,
"calib/mu_c": 0.583,
"calib/mu_w": 0.3992197109417349,
"calib/nonempty_final_conf_rate": 0.22265625,
"calib/nonempty_reasoning_rate": 0.359375,
"calib/nonempty_step_conf_rate": 0.2578125,
"calib/pce": 0.29368421052631577,
"calib/std_conf": 0.36305120477938346,
"calib/step_conf_rate": 0.2578125,
"calib/step_q_c": 0.6073076923076923,
"calib/step_q_c_n": 26.0,
"calib/step_q_gap": 0.2165480569679189,
"calib/step_q_w": 0.3907596353397734,
"calib/step_q_w_n": 258.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2963.0,
"completions/max_terminated_length": 2963.0,
"completions/mean_length": 551.85546875,
"completions/mean_terminated_length": 601.170166015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0224,
"grad_norm": 0.0173453651368618,
"learning_rate": 4.9722222222222224e-06,
"loss": 0.2237,
"num_tokens": 5626598.0,
"reward": 0.15919816493988037,
"reward_std": 0.28079676628112793,
"rewards/accuracy_reward_step": 0.0390625,
"rewards/final_brier_reward_step": 0.12308384478092194,
"rewards/format_reward_step": 0.15625,
"step": 21
},
{
"aux_distill/final_loss": 0.10222903767134994,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.11608608160167933,
"aux_distill/mean_u": 0.36473851604266366,
"aux_distill/n_active_final_tok": 8.25,
"aux_distill/n_active_tok": 48.75,
"aux_distill/step_loss": 1.0586317628622055,
"calib/answer_extract_rate": 0.34375,
"calib/auroc": 0.34110169491525427,
"calib/avg_num_step_conf": 1.53125,
"calib/ece": 0.3980276056338028,
"calib/final_conf_rate": 0.27734375,
"calib/format_rate": 0.25,
"calib/frac_conf_gt_0.9": 0.19718309859154928,
"calib/gap": -0.1942820338983051,
"calib/mean_conf": 0.40644563380281684,
"calib/mu_c": 0.24499999999999997,
"calib/mu_w": 0.43928203389830506,
"calib/nonempty_final_conf_rate": 0.27734375,
"calib/nonempty_reasoning_rate": 0.45703125,
"calib/nonempty_step_conf_rate": 0.37890625,
"calib/pce": 0.31772957746478875,
"calib/std_conf": 0.3810141867483041,
"calib/step_conf_rate": 0.37890625,
"calib/step_q_c": 0.31976744186046513,
"calib/step_q_c_n": 43.0,
"calib/step_q_gap": -0.012243663891437495,
"calib/step_q_w": 0.3320111057519026,
"calib/step_q_w_n": 349.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2913.0,
"completions/max_terminated_length": 2913.0,
"completions/mean_length": 469.73828125,
"completions/mean_terminated_length": 488.83331298828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.019904376938939095,
"learning_rate": 4.944444444444445e-06,
"loss": 0.248,
"num_tokens": 5848667.0,
"reward": 0.2241542935371399,
"reward_std": 0.32099008560180664,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.1475273072719574,
"rewards/format_reward_step": 0.25,
"step": 22
},
{
"aux_distill/final_loss": 0.08589777469751425,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.1131642828695476,
"aux_distill/mean_u": 0.27366238856607294,
"aux_distill/n_active_final_tok": 8.75,
"aux_distill/n_active_tok": 49.375,
"aux_distill/step_loss": 1.0457450337707996,
"calib/answer_extract_rate": 0.3984375,
"calib/auroc": 0.6170634920634921,
"calib/avg_num_step_conf": 1.57421875,
"calib/ece": 0.30312093023255815,
"calib/final_conf_rate": 0.3359375,
"calib/format_rate": 0.2578125,
"calib/frac_conf_gt_0.9": 0.13953488372093023,
"calib/gap": 0.15406706349206356,
"calib/mean_conf": 0.3867279069767442,
"calib/mu_c": 0.5157142857142858,
"calib/mu_w": 0.36164722222222223,
"calib/nonempty_final_conf_rate": 0.3359375,
"calib/nonempty_reasoning_rate": 0.52734375,
"calib/nonempty_step_conf_rate": 0.43359375,
"calib/pce": 0.26352906976744184,
"calib/std_conf": 0.3571084463815083,
"calib/step_conf_rate": 0.43359375,
"calib/step_q_c": 0.6026829268292683,
"calib/step_q_c_n": 41.0,
"calib/step_q_gap": 0.27729279912459975,
"calib/step_q_w": 0.3253901277046685,
"calib/step_q_w_n": 362.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2504.0,
"completions/max_terminated_length": 2504.0,
"completions/mean_length": 456.12890625,
"completions/mean_terminated_length": 467.0760192871094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.017549779266119003,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.2146,
"num_tokens": 6069372.0,
"reward": 0.2545512318611145,
"reward_std": 0.3819565176963806,
"rewards/accuracy_reward_step": 0.0625,
"rewards/final_brier_reward_step": 0.188789963722229,
"rewards/format_reward_step": 0.2578125,
"step": 23
},
{
"aux_distill/final_loss": 0.11669028896722011,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.11643350729718804,
"aux_distill/mean_u": 0.39236320772460315,
"aux_distill/n_active_final_tok": 12.5,
"aux_distill/n_active_tok": 78.375,
"aux_distill/step_loss": 1.0476447604596615,
"calib/answer_extract_rate": 0.5234375,
"calib/auroc": 0.4999999999999999,
"calib/avg_num_step_conf": 2.45703125,
"calib/ece": 0.23891773109243697,
"calib/final_conf_rate": 0.46484375,
"calib/format_rate": 0.3671875,
"calib/frac_conf_gt_0.9": 0.04201680672268908,
"calib/gap": 0.010720370101596544,
"calib/mean_conf": 0.2658353781512605,
"calib/mu_c": 0.2753846153846154,
"calib/mu_w": 0.26466424528301885,
"calib/nonempty_final_conf_rate": 0.46484375,
"calib/nonempty_reasoning_rate": 0.6328125,
"calib/nonempty_step_conf_rate": 0.54296875,
"calib/pce": 0.19775470588235294,
"calib/std_conf": 0.2908977995352265,
"calib/step_conf_rate": 0.54296875,
"calib/step_q_c": 0.26138888888888884,
"calib/step_q_c_n": 36.0,
"calib/step_q_gap": -0.011842957529105247,
"calib/step_q_w": 0.2732318464179941,
"calib/step_q_w_n": 593.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3033.0,
"completions/max_terminated_length": 3033.0,
"completions/mean_length": 501.94140625,
"completions/mean_terminated_length": 507.893310546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.0256,
"grad_norm": 0.01694255881011486,
"learning_rate": 4.888888888888889e-06,
"loss": 0.3453,
"num_tokens": 6302381.0,
"reward": 0.35615280270576477,
"reward_std": 0.4103126525878906,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.2943369150161743,
"rewards/format_reward_step": 0.3671875,
"step": 24
},
{
"aux_distill/final_loss": 0.1430833032936789,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.12002312182448804,
"aux_distill/mean_u": 0.3420518475664192,
"aux_distill/n_active_final_tok": 16.125,
"aux_distill/n_active_tok": 83.25,
"aux_distill/step_loss": 1.0571478921920061,
"calib/answer_extract_rate": 0.63671875,
"calib/auroc": 0.6062956204379562,
"calib/avg_num_step_conf": 2.62109375,
"calib/ece": 0.27120449312750566,
"calib/final_conf_rate": 0.59765625,
"calib/format_rate": 0.44140625,
"calib/frac_conf_gt_0.9": 0.08496732026143791,
"calib/gap": 0.11980306241964694,
"calib/mean_conf": 0.36130036240855146,
"calib/mu_c": 0.4685750000000001,
"calib/mu_w": 0.34877193758035313,
"calib/nonempty_final_conf_rate": 0.59765625,
"calib/nonempty_reasoning_rate": 0.8046875,
"calib/nonempty_step_conf_rate": 0.69140625,
"calib/pce": 0.26396484606868215,
"calib/std_conf": 0.3285230922100727,
"calib/step_conf_rate": 0.69140625,
"calib/step_q_c": 0.4666734693877551,
"calib/step_q_c_n": 49.0,
"calib/step_q_gap": 0.11133586206086299,
"calib/step_q_w": 0.3553376073268921,
"calib/step_q_w_n": 621.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 412.25,
"completions/mean_terminated_length": 418.7936706542969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.01768523082137108,
"learning_rate": 4.861111111111111e-06,
"loss": 0.3526,
"num_tokens": 6511141.0,
"reward": 0.4240191578865051,
"reward_std": 0.41996586322784424,
"rewards/accuracy_reward_step": 0.0703125,
"rewards/final_brier_reward_step": 0.33631956577301025,
"rewards/format_reward_step": 0.44140625,
"step": 25
},
{
"aux_distill/final_loss": 0.11389542344841175,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.1183220089878887,
"aux_distill/mean_u": 0.4198702327702227,
"aux_distill/n_active_final_tok": 17.75,
"aux_distill/n_active_tok": 92.75,
"aux_distill/step_loss": 1.06932464055717,
"calib/answer_extract_rate": 0.67578125,
"calib/auroc": 0.3595157657657657,
"calib/avg_num_step_conf": 2.8984375,
"calib/ece": 0.3143400050782022,
"calib/final_conf_rate": 0.625,
"calib/format_rate": 0.51953125,
"calib/frac_conf_gt_0.9": 0.06875,
"calib/gap": -0.13188307598433094,
"calib/mean_conf": 0.3295220213282022,
"calib/mu_c": 0.2075301760426961,
"calib/mu_w": 0.33941325202702705,
"calib/nonempty_final_conf_rate": 0.625,
"calib/nonempty_reasoning_rate": 0.78515625,
"calib/nonempty_step_conf_rate": 0.671875,
"calib/pce": 0.2844310132032022,
"calib/std_conf": 0.29963368167290444,
"calib/step_conf_rate": 0.671875,
"calib/step_q_c": 0.33064194704822947,
"calib/step_q_c_n": 31.0,
"calib/step_q_gap": 0.008411026387188658,
"calib/step_q_w": 0.3222309206610408,
"calib/step_q_w_n": 711.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3060.0,
"completions/max_terminated_length": 3060.0,
"completions/mean_length": 409.63671875,
"completions/mean_terminated_length": 411.2431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 14.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.01667754165828228,
"learning_rate": 4.833333333333333e-06,
"loss": 0.31,
"num_tokens": 6721248.0,
"reward": 0.4849138557910919,
"reward_std": 0.43144482374191284,
"rewards/accuracy_reward_step": 0.046875,
"rewards/final_brier_reward_step": 0.4034214913845062,
"rewards/format_reward_step": 0.51953125,
"step": 26
},
{
"aux_distill/final_loss": 0.09505854613962583,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.11087433947250247,
"aux_distill/mean_u": 0.3520416102787173,
"aux_distill/n_active_final_tok": 21.375,
"aux_distill/n_active_tok": 112.0,
"aux_distill/step_loss": 1.013684831559658,
"calib/answer_extract_rate": 0.7890625,
"calib/auroc": 0.6753246753246753,
"calib/avg_num_step_conf": 3.5,
"calib/ece": 0.2809422702281864,
"calib/final_conf_rate": 0.7265625,
"calib/format_rate": 0.64453125,
"calib/frac_conf_gt_0.9": 0.04838709677419355,
"calib/gap": 0.16678135850032755,
"calib/mean_conf": 0.3400820551744229,
"calib/mu_c": 0.49699999999999994,
"calib/mu_w": 0.3302186414996724,
"calib/nonempty_final_conf_rate": 0.7265625,
"calib/nonempty_reasoning_rate": 0.91796875,
"calib/nonempty_step_conf_rate": 0.83984375,
"calib/pce": 0.2809422702281864,
"calib/std_conf": 0.2850531043616158,
"calib/step_conf_rate": 0.83984375,
"calib/step_q_c": 0.5752857142857143,
"calib/step_q_c_n": 49.0,
"calib/step_q_gap": 0.2177253253635057,
"calib/step_q_w": 0.3575603889222086,
"calib/step_q_w_n": 847.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2435.0,
"completions/max_terminated_length": 2435.0,
"completions/mean_length": 346.09765625,
"completions/mean_terminated_length": 348.8228454589844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.0288,
"grad_norm": 0.015667244791984558,
"learning_rate": 4.805555555555556e-06,
"loss": 0.2393,
"num_tokens": 6915065.0,
"reward": 0.6040114164352417,
"reward_std": 0.4578133821487427,
"rewards/accuracy_reward_step": 0.04296875,
"rewards/final_brier_reward_step": 0.5205228328704834,
"rewards/format_reward_step": 0.64453125,
"step": 27
},
{
"aux_distill/final_loss": 0.07474714342970401,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10493989638052881,
"aux_distill/mean_u": 0.354241252103711,
"aux_distill/n_active_final_tok": 20.75,
"aux_distill/n_active_tok": 110.75,
"aux_distill/step_loss": 0.9746518153697252,
"calib/answer_extract_rate": 0.7578125,
"calib/auroc": 0.37758051197357556,
"calib/avg_num_step_conf": 3.47265625,
"calib/ece": 0.2812024404896149,
"calib/final_conf_rate": 0.73046875,
"calib/format_rate": 0.62109375,
"calib/frac_conf_gt_0.9": 0.0427807486631016,
"calib/gap": -0.12303030936396864,
"calib/mean_conf": 0.3104126718098503,
"calib/mu_c": 0.19659318774585793,
"calib/mu_w": 0.31962349710982657,
"calib/nonempty_final_conf_rate": 0.73046875,
"calib/nonempty_reasoning_rate": 0.88671875,
"calib/nonempty_step_conf_rate": 0.796875,
"calib/pce": 0.2583744010695187,
"calib/std_conf": 0.27782210737333857,
"calib/step_conf_rate": 0.796875,
"calib/step_q_c": 0.3388571428571428,
"calib/step_q_c_n": 35.0,
"calib/step_q_gap": 0.012772158115358812,
"calib/step_q_w": 0.326084984741784,
"calib/step_q_w_n": 852.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3006.0,
"completions/max_terminated_length": 3006.0,
"completions/mean_length": 339.8828125,
"completions/mean_terminated_length": 343.9130554199219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.013735142536461353,
"learning_rate": 4.777777777777778e-06,
"loss": 0.1881,
"num_tokens": 7109019.0,
"reward": 0.5857105851173401,
"reward_std": 0.42725497484207153,
"rewards/accuracy_reward_step": 0.0546875,
"rewards/final_brier_reward_step": 0.4956399202346802,
"rewards/format_reward_step": 0.62109375,
"step": 28
},
{
"aux_distill/final_loss": 0.06856484201853164,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10507733467966318,
"aux_distill/mean_u": 0.36215323672527155,
"aux_distill/n_active_final_tok": 24.75,
"aux_distill/n_active_tok": 109.5,
"aux_distill/step_loss": 0.9822084847837687,
"calib/answer_extract_rate": 0.84765625,
"calib/auroc": 0.5148529411764706,
"calib/avg_num_step_conf": 3.421875,
"calib/ece": 0.3110308512700327,
"calib/final_conf_rate": 0.84765625,
"calib/format_rate": 0.75390625,
"calib/frac_conf_gt_0.9": 0.06912442396313365,
"calib/gap": 0.010225580228504727,
"calib/mean_conf": 0.3835107962111911,
"calib/mu_c": 0.39293529411764705,
"calib/mu_w": 0.3827097138891423,
"calib/nonempty_final_conf_rate": 0.84765625,
"calib/nonempty_reasoning_rate": 0.9296875,
"calib/nonempty_step_conf_rate": 0.875,
"calib/pce": 0.3081003168281695,
"calib/std_conf": 0.2824604470729387,
"calib/step_conf_rate": 0.875,
"calib/step_q_c": 0.39023787878787886,
"calib/step_q_c_n": 66.0,
"calib/step_q_gap": 0.0162489130015665,
"calib/step_q_w": 0.37398896578631236,
"calib/step_q_w_n": 810.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2550.0,
"completions/max_terminated_length": 2550.0,
"completions/mean_length": 292.71484375,
"completions/mean_terminated_length": 295.0196838378906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 10.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.011677009984850883,
"learning_rate": 4.75e-06,
"loss": 0.1245,
"num_tokens": 7291082.0,
"reward": 0.698754072189331,
"reward_std": 0.38540327548980713,
"rewards/accuracy_reward_step": 0.06640625,
"rewards/final_brier_reward_step": 0.5771956443786621,
"rewards/format_reward_step": 0.75390625,
"step": 29
},
{
"aux_distill/final_loss": 0.07209987913665827,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10355257894843817,
"aux_distill/mean_u": 0.29610125990414,
"aux_distill/n_active_final_tok": 26.625,
"aux_distill/n_active_tok": 116.375,
"aux_distill/step_loss": 0.9634258858859539,
"calib/answer_extract_rate": 0.87890625,
"calib/auroc": 0.4930756843800322,
"calib/avg_num_step_conf": 3.63671875,
"calib/ece": 0.30701373725483133,
"calib/final_conf_rate": 0.8671875,
"calib/format_rate": 0.7890625,
"calib/frac_conf_gt_0.9": 0.05855855855855856,
"calib/gap": -0.026933573287790114,
"calib/mean_conf": 0.37244707058816473,
"calib/mu_c": 0.3473333333333333,
"calib/mu_w": 0.37426690662112344,
"calib/nonempty_final_conf_rate": 0.8671875,
"calib/nonempty_reasoning_rate": 0.96875,
"calib/nonempty_step_conf_rate": 0.92578125,
"calib/pce": 0.30594662013771423,
"calib/std_conf": 0.2842563330638594,
"calib/step_conf_rate": 0.92578125,
"calib/step_q_c": 0.40701492537313433,
"calib/step_q_c_n": 67.0,
"calib/step_q_gap": 0.019904566958119818,
"calib/step_q_w": 0.3871103584150145,
"calib/step_q_w_n": 864.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2519.0,
"completions/max_terminated_length": 2519.0,
"completions/mean_length": 284.31640625,
"completions/mean_terminated_length": 284.31640625,
"completions/min_length": 5.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.032,
"grad_norm": 0.011391903273761272,
"learning_rate": 4.722222222222222e-06,
"loss": 0.134,
"num_tokens": 7470851.0,
"reward": 0.7274598479270935,
"reward_std": 0.38493478298187256,
"rewards/accuracy_reward_step": 0.06640625,
"rewards/final_brier_reward_step": 0.599450945854187,
"rewards/format_reward_step": 0.7890625,
"step": 30
},
{
"aux_distill/final_loss": 0.026639884941687342,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10087639489211142,
"aux_distill/mean_u": 0.34303952978799784,
"aux_distill/n_active_final_tok": 27.125,
"aux_distill/n_active_tok": 116.375,
"aux_distill/step_loss": 0.9821240454912186,
"calib/answer_extract_rate": 0.9140625,
"calib/auroc": 0.5539274322169059,
"calib/avg_num_step_conf": 3.63671875,
"calib/ece": 0.25141960898565074,
"calib/final_conf_rate": 0.91015625,
"calib/format_rate": 0.81640625,
"calib/frac_conf_gt_0.9": 0.03862660944206009,
"calib/gap": 0.05544985377835743,
"calib/mean_conf": 0.32780338580968515,
"calib/mu_c": 0.37754166666666666,
"calib/mu_w": 0.32209181288830924,
"calib/nonempty_final_conf_rate": 0.91015625,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.91796875,
"calib/pce": 0.23810935147492115,
"calib/std_conf": 0.278105933595617,
"calib/step_conf_rate": 0.91796875,
"calib/step_q_c": 0.4104555555555555,
"calib/step_q_c_n": 90.0,
"calib/step_q_gap": 0.03663538551988377,
"calib/step_q_w": 0.37382017003567175,
"calib/step_q_w_n": 841.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1813.0,
"completions/max_terminated_length": 1813.0,
"completions/mean_length": 271.5078125,
"completions/mean_terminated_length": 271.5078125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.010902057401835918,
"learning_rate": 4.694444444444445e-06,
"loss": 0.2134,
"num_tokens": 7646269.0,
"reward": 0.7762458324432373,
"reward_std": 0.36868053674697876,
"rewards/accuracy_reward_step": 0.09375,
"rewards/final_brier_reward_step": 0.6423354148864746,
"rewards/format_reward_step": 0.81640625,
"step": 31
},
{
"aux_distill/final_loss": 0.06401024729711935,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10776355746202171,
"aux_distill/mean_u": 0.34849096609468133,
"aux_distill/n_active_final_tok": 28.75,
"aux_distill/n_active_tok": 115.75,
"aux_distill/step_loss": 1.0136253219097853,
"calib/answer_extract_rate": 0.9140625,
"calib/auroc": 0.4738944630248978,
"calib/avg_num_step_conf": 3.6171875,
"calib/ece": 0.24195064377682404,
"calib/final_conf_rate": 0.91015625,
"calib/format_rate": 0.86328125,
"calib/frac_conf_gt_0.9": 0.02575107296137339,
"calib/gap": -0.011549424005945663,
"calib/mean_conf": 0.3294914163090129,
"calib/mu_c": 0.31923076923076926,
"calib/mu_w": 0.3307801932367149,
"calib/nonempty_final_conf_rate": 0.91015625,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.953125,
"calib/pce": 0.22992703862660946,
"calib/std_conf": 0.23962689939454776,
"calib/step_conf_rate": 0.953125,
"calib/step_q_c": 0.39330275229357803,
"calib/step_q_c_n": 109.0,
"calib/step_q_gap": 0.034800549111203505,
"calib/step_q_w": 0.3585022031823745,
"calib/step_q_w_n": 817.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1978.0,
"completions/max_terminated_length": 1978.0,
"completions/mean_length": 258.890625,
"completions/mean_terminated_length": 258.890625,
"completions/min_length": 5.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.011250043287873268,
"learning_rate": 4.666666666666667e-06,
"loss": 0.108,
"num_tokens": 7819249.0,
"reward": 0.8262053728103638,
"reward_std": 0.3190339207649231,
"rewards/accuracy_reward_step": 0.1015625,
"rewards/final_brier_reward_step": 0.6875669956207275,
"rewards/format_reward_step": 0.86328125,
"step": 32
},
{
"aux_distill/final_loss": 0.06823475078999763,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10778754344210029,
"aux_distill/mean_u": 0.36732323440413506,
"aux_distill/n_active_final_tok": 29.25,
"aux_distill/n_active_tok": 115.375,
"aux_distill/step_loss": 1.0096406731754541,
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5757847533632288,
"calib/avg_num_step_conf": 3.60546875,
"calib/ece": 0.2554921504225731,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.890625,
"calib/frac_conf_gt_0.9": 0.04938271604938271,
"calib/gap": 0.06669241007764448,
"calib/mean_conf": 0.3377966771715444,
"calib/mu_c": 0.39899999999999997,
"calib/mu_w": 0.3323075899223555,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.94921875,
"calib/pce": 0.2554921504225731,
"calib/std_conf": 0.25518815943970735,
"calib/step_conf_rate": 0.94921875,
"calib/step_q_c": 0.32671875000000006,
"calib/step_q_c_n": 64.0,
"calib/step_q_gap": -0.03693933213351014,
"calib/step_q_w": 0.3636580821335102,
"calib/step_q_w_n": 859.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2101.0,
"completions/max_terminated_length": 2101.0,
"completions/mean_length": 240.44921875,
"completions/mean_terminated_length": 241.3921661376953,
"completions/min_length": 0.0,
"completions/min_terminated_length": 12.0,
"epoch": 0.0352,
"grad_norm": 0.01088921632617712,
"learning_rate": 4.638888888888889e-06,
"loss": 0.1041,
"num_tokens": 7987676.0,
"reward": 0.8405758142471313,
"reward_std": 0.2909170389175415,
"rewards/accuracy_reward_step": 0.078125,
"rewards/final_brier_reward_step": 0.7124015688896179,
"rewards/format_reward_step": 0.890625,
"step": 33
},
{
"aux_distill/final_loss": 0.060225540088140406,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10242805699817836,
"aux_distill/mean_u": 0.3273371979523068,
"aux_distill/n_active_final_tok": 29.375,
"aux_distill/n_active_tok": 128.875,
"aux_distill/step_loss": 0.9640550166368484,
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.40304528891202496,
"calib/avg_num_step_conf": 4.03125,
"calib/ece": 0.26849306686419755,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.8984375,
"calib/frac_conf_gt_0.9": 0.02880658436213992,
"calib/gap": -0.07045224862467475,
"calib/mean_conf": 0.31905290225514404,
"calib/mu_c": 0.25352941176470584,
"calib/mu_w": 0.3239816603893806,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.96875,
"calib/pce": 0.258793560691358,
"calib/std_conf": 0.2436208075519449,
"calib/step_conf_rate": 0.96875,
"calib/step_q_c": 0.3027536231884059,
"calib/step_q_c_n": 69.0,
"calib/step_q_gap": -0.04627410266829196,
"calib/step_q_w": 0.34902772585669783,
"calib/step_q_w_n": 963.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1460.0,
"completions/max_terminated_length": 1460.0,
"completions/mean_length": 233.73046875,
"completions/mean_terminated_length": 234.64707946777344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 24.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.010752441361546516,
"learning_rate": 4.611111111111112e-06,
"loss": 0.0846,
"num_tokens": 8152623.0,
"reward": 0.8435355424880981,
"reward_std": 0.292891263961792,
"rewards/accuracy_reward_step": 0.06640625,
"rewards/final_brier_reward_step": 0.7222274541854858,
"rewards/format_reward_step": 0.8984375,
"step": 34
},
{
"aux_distill/final_loss": 0.06452472699311329,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10324789304286242,
"aux_distill/mean_u": 0.3524373133250392,
"aux_distill/n_active_final_tok": 29.875,
"aux_distill/n_active_tok": 130.0,
"aux_distill/step_loss": 0.9679541885852814,
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5807174887892377,
"calib/avg_num_step_conf": 4.0625,
"calib/ece": 0.25050661157024795,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.90625,
"calib/frac_conf_gt_0.9": 0.028925619834710745,
"calib/gap": 0.06446320509794667,
"calib/mean_conf": 0.32901900826446284,
"calib/mu_c": 0.388421052631579,
"calib/mu_w": 0.3239578475336323,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.25050661157024795,
"calib/std_conf": 0.24719545477582386,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.418375,
"calib/step_q_c_n": 80.0,
"calib/step_q_gap": 0.033822291666666615,
"calib/step_q_w": 0.3845527083333334,
"calib/step_q_w_n": 960.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2830.0,
"completions/max_terminated_length": 2830.0,
"completions/mean_length": 263.8046875,
"completions/mean_terminated_length": 263.8046875,
"completions/min_length": 50.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.010321549139916897,
"learning_rate": 4.583333333333333e-06,
"loss": 0.2327,
"num_tokens": 8329413.0,
"reward": 0.8620861768722534,
"reward_std": 0.25374510884284973,
"rewards/accuracy_reward_step": 0.078125,
"rewards/final_brier_reward_step": 0.7397972345352173,
"rewards/format_reward_step": 0.90625,
"step": 35
},
{
"aux_distill/final_loss": 0.04324473983433563,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10079836356453598,
"aux_distill/mean_u": 0.34983613467249147,
"aux_distill/n_active_final_tok": 30.25,
"aux_distill/n_active_tok": 138.75,
"aux_distill/step_loss": 0.9647388868033886,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5186206120612061,
"calib/avg_num_step_conf": 4.3359375,
"calib/ece": 0.17529593495934961,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.016260162601626018,
"calib/gap": 0.006814041404140436,
"calib/mean_conf": 0.3203138211382114,
"calib/mu_c": 0.3259090909090909,
"calib/mu_w": 0.3190950495049505,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.15837398373983744,
"calib/std_conf": 0.226129813903,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.3540217391304348,
"calib/step_q_c_n": 184.0,
"calib/step_q_gap": -0.0035948915391116887,
"calib/step_q_w": 0.35761663066954646,
"calib/step_q_w_n": 926.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2130.0,
"completions/max_terminated_length": 2130.0,
"completions/mean_length": 237.359375,
"completions/mean_terminated_length": 237.359375,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"epoch": 0.0384,
"grad_norm": 0.009128491394221783,
"learning_rate": 4.555555555555556e-06,
"loss": 0.1158,
"num_tokens": 8492889.0,
"reward": 0.9270957708358765,
"reward_std": 0.23009908199310303,
"rewards/accuracy_reward_step": 0.17578125,
"rewards/final_brier_reward_step": 0.7409102916717529,
"rewards/format_reward_step": 0.9375,
"step": 36
},
{
"aux_distill/final_loss": 0.02983783091622172,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09856392722576857,
"aux_distill/mean_u": 0.31691360948698644,
"aux_distill/n_active_final_tok": 30.375,
"aux_distill/n_active_tok": 140.375,
"aux_distill/step_loss": 0.9558014236390591,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4738962294553658,
"calib/avg_num_step_conf": 4.38671875,
"calib/ece": 0.1954847086831275,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.00411522633744856,
"calib/gap": -0.012209304236222962,
"calib/mean_conf": 0.27109705436213993,
"calib/mu_c": 0.2603448275862069,
"calib/mu_w": 0.2725541318224299,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.1736200996296296,
"calib/std_conf": 0.20206661622017189,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2971818181818182,
"calib/step_q_c_n": 110.0,
"calib/step_q_gap": -0.02671931644799419,
"calib/step_q_w": 0.3239011346298124,
"calib/step_q_w_n": 1013.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1811.0,
"completions/max_terminated_length": 1811.0,
"completions/mean_length": 257.17578125,
"completions/mean_terminated_length": 258.184326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 49.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.008870480582118034,
"learning_rate": 4.527777777777778e-06,
"loss": 0.2145,
"num_tokens": 8665822.0,
"reward": 0.9225687980651855,
"reward_std": 0.20187771320343018,
"rewards/accuracy_reward_step": 0.1171875,
"rewards/final_brier_reward_step": 0.7826376557350159,
"rewards/format_reward_step": 0.9453125,
"step": 37
},
{
"aux_distill/final_loss": 0.04951605253154412,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10669704480096698,
"aux_distill/mean_u": 0.39243347865395956,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 144.875,
"aux_distill/step_loss": 1.017454382032156,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5411618741789519,
"calib/avg_num_step_conf": 4.546875,
"calib/ece": 0.20768253968253966,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.01984126984126984,
"calib/gap": 0.04232024521967592,
"calib/mean_conf": 0.3030793650793651,
"calib/mu_c": 0.34019354838709676,
"calib/mu_w": 0.29787330316742083,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.19387301587301584,
"calib/std_conf": 0.22660456443288846,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.40094296296296295,
"calib/step_q_c_n": 135.0,
"calib/step_q_gap": 0.07968931864809414,
"calib/step_q_w": 0.3212536443148688,
"calib/step_q_w_n": 1029.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 651.0,
"completions/max_terminated_length": 651.0,
"completions/mean_length": 222.73828125,
"completions/mean_terminated_length": 223.6117706298828,
"completions/min_length": 0.0,
"completions/min_terminated_length": 18.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.009885660372674465,
"learning_rate": 4.5e-06,
"loss": 0.0605,
"num_tokens": 8829731.0,
"reward": 0.9365085959434509,
"reward_std": 0.1839677095413208,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.7870796918869019,
"rewards/format_reward_step": 0.96484375,
"step": 38
},
{
"aux_distill/final_loss": 0.03649313731875736,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10016206814907491,
"aux_distill/mean_u": 0.34258266360839884,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 149.125,
"aux_distill/step_loss": 0.9651275295764208,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5206266729648874,
"calib/avg_num_step_conf": 4.66015625,
"calib/ece": 0.19338709677419355,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.020161290322580645,
"calib/gap": 0.0005133049913399468,
"calib/mean_conf": 0.3078225806451613,
"calib/mu_c": 0.30827586206896557,
"calib/mu_w": 0.3077625570776256,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.19213709677419355,
"calib/std_conf": 0.23030856049765402,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3291964285714286,
"calib/step_q_c_n": 112.0,
"calib/step_q_gap": -0.0034354102140208287,
"calib/step_q_w": 0.33263183878544944,
"calib/step_q_w_n": 1081.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2996.0,
"completions/max_terminated_length": 2996.0,
"completions/mean_length": 256.28125,
"completions/mean_terminated_length": 256.28125,
"completions/min_length": 47.0,
"completions/min_terminated_length": 47.0,
"epoch": 0.0416,
"grad_norm": 0.008934065699577332,
"learning_rate": 4.472222222222223e-06,
"loss": 0.1541,
"num_tokens": 9001427.0,
"reward": 0.9320827722549438,
"reward_std": 0.1839485764503479,
"rewards/accuracy_reward_step": 0.11328125,
"rewards/final_brier_reward_step": 0.7821344137191772,
"rewards/format_reward_step": 0.96875,
"step": 39
},
{
"aux_distill/final_loss": 0.05308996573148761,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09999302681535482,
"aux_distill/mean_u": 0.341460604743016,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 167.375,
"aux_distill/step_loss": 0.9468402825295925,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5058177646673222,
"calib/avg_num_step_conf": 5.3203125,
"calib/ece": 0.17167667984189722,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": 0.0030394296951818944,
"calib/mean_conf": 0.2783960474308301,
"calib/mu_c": 0.2811111111111111,
"calib/mu_w": 0.2780716814159292,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.17167667984189722,
"calib/std_conf": 0.186278735810632,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3487096774193548,
"calib/step_q_c_n": 155.0,
"calib/step_q_gap": 0.04356941229922223,
"calib/step_q_w": 0.3051402651201326,
"calib/step_q_w_n": 1207.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 680.0,
"completions/max_terminated_length": 680.0,
"completions/mean_length": 246.3046875,
"completions/mean_terminated_length": 247.27059936523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 49.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.010064591653645039,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0759,
"num_tokens": 9171241.0,
"reward": 0.9558616280555725,
"reward_std": 0.13117021322250366,
"rewards/accuracy_reward_step": 0.10546875,
"rewards/final_brier_reward_step": 0.825785756111145,
"rewards/format_reward_step": 0.98046875,
"step": 40
},
{
"aux_distill/final_loss": 0.03107749327318743,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.10013743420131505,
"aux_distill/mean_u": 0.3639978660274057,
"aux_distill/n_active_final_tok": 30.75,
"aux_distill/n_active_tok": 158.625,
"aux_distill/step_loss": 0.9702968336641788,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5538297872340426,
"calib/avg_num_step_conf": 4.95703125,
"calib/ece": 0.1259919028340081,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03293723404255322,
"calib/mean_conf": 0.27311740890688263,
"calib/mu_c": 0.29978723404255325,
"calib/mu_w": 0.26685000000000003,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.10441295546558704,
"calib/std_conf": 0.1901639990614291,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.3252631578947368,
"calib/step_q_c_n": 209.0,
"calib/step_q_gap": 0.038083912611717896,
"calib/step_q_w": 0.2871792452830189,
"calib/step_q_w_n": 1060.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2412.0,
"completions/max_terminated_length": 2412.0,
"completions/mean_length": 248.796875,
"completions/mean_terminated_length": 248.796875,
"completions/min_length": 29.0,
"completions/min_terminated_length": 29.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.01017068326473236,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0853,
"num_tokens": 9342181.0,
"reward": 0.9481226205825806,
"reward_std": 0.20646090805530548,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.7673390507698059,
"rewards/format_reward_step": 0.94140625,
"step": 41
},
{
"aux_distill/final_loss": 0.04891497686912771,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09970072889700532,
"aux_distill/mean_u": 0.35922230188739757,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 167.125,
"aux_distill/step_loss": 0.948092307895422,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4345588235294117,
"calib/avg_num_step_conf": 5.2265625,
"calib/ece": 0.1948976377952756,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.007874015748031496,
"calib/gap": -0.04478288770053476,
"calib/mean_conf": 0.2570236220472441,
"calib/mu_c": 0.21823529411764706,
"calib/mu_w": 0.2630181818181818,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.15903149606299213,
"calib/std_conf": 0.1976297127939186,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.26745562130177514,
"calib/step_q_c_n": 169.0,
"calib/step_q_gap": -0.02169065756905464,
"calib/step_q_w": 0.2891462788708298,
"calib/step_q_w_n": 1169.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 520.0,
"completions/max_terminated_length": 520.0,
"completions/mean_length": 238.9140625,
"completions/mean_terminated_length": 239.8509979248047,
"completions/min_length": 0.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.0448,
"grad_norm": 0.009412148036062717,
"learning_rate": 4.388888888888889e-06,
"loss": 0.1176,
"num_tokens": 9507711.0,
"reward": 0.9523229598999023,
"reward_std": 0.1509455144405365,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.7991771697998047,
"rewards/format_reward_step": 0.97265625,
"step": 42
},
{
"aux_distill/final_loss": 0.017450424864364322,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09527178690768778,
"aux_distill/mean_u": 0.3074926680926342,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 165.5,
"aux_distill/step_loss": 0.9352674260735512,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5001218323586744,
"calib/avg_num_step_conf": 5.171875,
"calib/ece": 0.1387244094488189,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": -0.021141325536062405,
"calib/mean_conf": 0.2671889763779528,
"calib/mu_c": 0.24921052631578944,
"calib/mu_w": 0.27035185185185184,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.1281535433070866,
"calib/std_conf": 0.19103756205181283,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3148258706467662,
"calib/step_q_c_n": 201.0,
"calib/step_q_gap": 0.01793148062005201,
"calib/step_q_w": 0.2968943900267142,
"calib/step_q_w_n": 1123.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1804.0,
"completions/max_terminated_length": 1804.0,
"completions/mean_length": 258.19140625,
"completions/mean_terminated_length": 258.19140625,
"completions/min_length": 64.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.009506667964160442,
"learning_rate": 4.361111111111112e-06,
"loss": 0.0957,
"num_tokens": 9679032.0,
"reward": 0.9611801505088806,
"reward_std": 0.1372612565755844,
"rewards/accuracy_reward_step": 0.1484375,
"rewards/final_brier_reward_step": 0.7973603010177612,
"rewards/format_reward_step": 0.9765625,
"step": 43
},
{
"aux_distill/final_loss": 0.016983458794129547,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09559705457650125,
"aux_distill/mean_u": 0.3491452177791532,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 182.0,
"aux_distill/step_loss": 0.9389870651066303,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.596326164874552,
"calib/avg_num_step_conf": 5.75,
"calib/ece": 0.12504150197628458,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.003952569169960474,
"calib/gap": 0.06651459293394774,
"calib/mean_conf": 0.24211660079051384,
"calib/mu_c": 0.29916666666666664,
"calib/mu_w": 0.2326520737327189,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11243280632411067,
"calib/std_conf": 0.17807459255864547,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.30972527472527467,
"calib/step_q_c_n": 182.0,
"calib/step_q_gap": 0.050196127438452975,
"calib/step_q_w": 0.2595291472868217,
"calib/step_q_w_n": 1290.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 667.0,
"completions/max_terminated_length": 667.0,
"completions/mean_length": 264.6640625,
"completions/mean_terminated_length": 265.70196533203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.008401011116802692,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0687,
"num_tokens": 9853106.0,
"reward": 0.9753538370132446,
"reward_std": 0.13524678349494934,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8335201740264893,
"rewards/format_reward_step": 0.9765625,
"step": 44
},
{
"aux_distill/final_loss": 0.02873411323525943,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09477901551872492,
"aux_distill/mean_u": 0.2913159320204437,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 185.5,
"aux_distill/step_loss": 0.9190560318529606,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5002170138888888,
"calib/avg_num_step_conf": 5.796875,
"calib/ece": 0.15745322580645163,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.009127777777777762,
"calib/mean_conf": 0.25295,
"calib/mu_c": 0.24500000000000002,
"calib/mu_w": 0.2541277777777778,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.14068548387096777,
"calib/std_conf": 0.18730663513211732,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.22559006211180124,
"calib/step_q_c_n": 161.0,
"calib/step_q_gap": -0.04106481317164548,
"calib/step_q_w": 0.2666548752834467,
"calib/step_q_w_n": 1323.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2756.0,
"completions/max_terminated_length": 2756.0,
"completions/mean_length": 295.80078125,
"completions/mean_terminated_length": 295.80078125,
"completions/min_length": 45.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.048,
"grad_norm": 0.007688302546739578,
"learning_rate": 4.305555555555556e-06,
"loss": 0.1917,
"num_tokens": 10033879.0,
"reward": 0.9475110769271851,
"reward_std": 0.16948533058166504,
"rewards/accuracy_reward_step": 0.125,
"rewards/final_brier_reward_step": 0.8051784038543701,
"rewards/format_reward_step": 0.96484375,
"step": 45
},
{
"aux_distill/final_loss": 0.027575008094572695,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09906627563759685,
"aux_distill/mean_u": 0.3723311350278894,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 193.75,
"aux_distill/step_loss": 0.9630877319723368,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.49044551798174985,
"calib/avg_num_step_conf": 6.0703125,
"calib/ece": 0.18154761904761907,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.003968253968253968,
"calib/gap": 0.0033913043478261684,
"calib/mean_conf": 0.24765873015873016,
"calib/mu_c": 0.2504444444444445,
"calib/mu_w": 0.24705314009661836,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.12531746031746033,
"calib/std_conf": 0.20316658512824695,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.278125,
"calib/step_q_c_n": 224.0,
"calib/step_q_gap": 0.01937590225563912,
"calib/step_q_w": 0.2587490977443609,
"calib/step_q_w_n": 1330.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2639.0,
"completions/max_terminated_length": 2639.0,
"completions/mean_length": 285.0,
"completions/mean_terminated_length": 285.0,
"completions/min_length": 43.0,
"completions/min_terminated_length": 43.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.007769672200083733,
"learning_rate": 4.277777777777778e-06,
"loss": 0.1511,
"num_tokens": 10211607.0,
"reward": 0.9743492007255554,
"reward_std": 0.13994555175304413,
"rewards/accuracy_reward_step": 0.17578125,
"rewards/final_brier_reward_step": 0.7924484610557556,
"rewards/format_reward_step": 0.98046875,
"step": 46
},
{
"aux_distill/final_loss": 0.02417274876643205,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0960873260628432,
"aux_distill/mean_u": 0.362197381845286,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 196.5,
"aux_distill/step_loss": 0.9367004819214344,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4843272171253823,
"calib/avg_num_step_conf": 6.16796875,
"calib/ece": 0.1505511811023622,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": -0.008888888888888863,
"calib/mean_conf": 0.21874015748031497,
"calib/mu_c": 0.21111111111111114,
"calib/mu_w": 0.22,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11377952755905511,
"calib/std_conf": 0.18822415906750153,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.21770408163265306,
"calib/step_q_c_n": 196.0,
"calib/step_q_gap": -0.02792903478094058,
"calib/step_q_w": 0.24563311641359364,
"calib/step_q_w_n": 1383.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 771.0,
"completions/max_terminated_length": 771.0,
"completions/mean_length": 264.59375,
"completions/mean_terminated_length": 265.6313781738281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.007656062953174114,
"learning_rate": 4.25e-06,
"loss": 0.034,
"num_tokens": 10385319.0,
"reward": 0.9708345532417297,
"reward_std": 0.11946520209312439,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8205753564834595,
"rewards/format_reward_step": 0.98046875,
"step": 47
},
{
"aux_distill/final_loss": 0.020226882541464875,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09678884199820459,
"aux_distill/mean_u": 0.3264913128481655,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 181.875,
"aux_distill/step_loss": 0.9476615190505981,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.502124987926205,
"calib/avg_num_step_conf": 5.68359375,
"calib/ece": 0.12513385826771653,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.007874015748031496,
"calib/gap": -0.0021980102385781575,
"calib/mean_conf": 0.20195275590551184,
"calib/mu_c": 0.20019607843137258,
"calib/mu_w": 0.20239408866995073,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0631496062992126,
"calib/std_conf": 0.16469964828726347,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.25000000000000006,
"calib/step_q_c_n": 266.0,
"calib/step_q_gap": 0.02378216989066448,
"calib/step_q_w": 0.22621783010933558,
"calib/step_q_w_n": 1189.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1571.0,
"completions/max_terminated_length": 1571.0,
"completions/mean_length": 264.6953125,
"completions/mean_terminated_length": 265.73333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 75.0,
"epoch": 0.0512,
"grad_norm": 0.008458969183266163,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0648,
"num_tokens": 10556769.0,
"reward": 0.9897904992103577,
"reward_std": 0.12231529504060745,
"rewards/accuracy_reward_step": 0.203125,
"rewards/final_brier_reward_step": 0.7959872484207153,
"rewards/format_reward_step": 0.98046875,
"step": 48
},
{
"aux_distill/final_loss": 0.009723340248456225,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09664465487003326,
"aux_distill/mean_u": 0.360260341416223,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 192.875,
"aux_distill/step_loss": 0.9567231871187687,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5453703703703704,
"calib/avg_num_step_conf": 6.02734375,
"calib/ece": 0.10180708661417325,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03398203703703706,
"calib/mean_conf": 0.20527952755905515,
"calib/mu_c": 0.23203703703703707,
"calib/mu_w": 0.198055,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04724409448818899,
"calib/std_conf": 0.1555700864099637,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.25839344262295083,
"calib/step_q_c_n": 305.0,
"calib/step_q_gap": 0.019905558939590534,
"calib/step_q_w": 0.2384878836833603,
"calib/step_q_w_n": 1238.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1855.0,
"completions/max_terminated_length": 1855.0,
"completions/mean_length": 278.89453125,
"completions/mean_terminated_length": 278.89453125,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.007638960611075163,
"learning_rate": 4.194444444444445e-06,
"loss": 0.1125,
"num_tokens": 10732702.0,
"reward": 1.004418134689331,
"reward_std": 0.10782338678836823,
"rewards/accuracy_reward_step": 0.2109375,
"rewards/final_brier_reward_step": 0.8096175193786621,
"rewards/format_reward_step": 0.98828125,
"step": 49
},
{
"aux_distill/final_loss": 0.03108279372099787,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09588481183163822,
"aux_distill/mean_u": 0.34428673420801703,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 212.5,
"aux_distill/step_loss": 0.9277653079479933,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5084745762711865,
"calib/avg_num_step_conf": 6.734375,
"calib/ece": 0.123709765625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0078125,
"calib/gap": -0.007617904155553662,
"calib/mean_conf": 0.191680859375,
"calib/mu_c": 0.18581864406779658,
"calib/mu_w": 0.19343654822335024,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.042460937500000004,
"calib/std_conf": 0.1701023200373057,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.22982303206997085,
"calib/step_q_c_n": 343.0,
"calib/step_q_gap": 0.013334255820875995,
"calib/step_q_w": 0.21648877624909485,
"calib/step_q_w_n": 1381.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1142.0,
"completions/max_terminated_length": 1142.0,
"completions/mean_length": 311.10546875,
"completions/mean_terminated_length": 312.32550048828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.006806428078562021,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0811,
"num_tokens": 10917705.0,
"reward": 1.0072999000549316,
"reward_std": 0.08516330271959305,
"rewards/accuracy_reward_step": 0.23046875,
"rewards/final_brier_reward_step": 0.7880373001098633,
"rewards/format_reward_step": 0.99609375,
"step": 50
},
{
"aux_distill/final_loss": 0.019811689671769273,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09419922926463187,
"aux_distill/mean_u": 0.2904671369685525,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 207.375,
"aux_distill/step_loss": 0.9221805911511183,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5173215531271798,
"calib/avg_num_step_conf": 6.546875,
"calib/ece": 0.14816406249999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.020904440827714504,
"calib/mean_conf": 0.17777343750000002,
"calib/mu_c": 0.19304347826086957,
"calib/mu_w": 0.17213903743315506,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.028203125000000002,
"calib/std_conf": 0.15118323499791106,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.21153452685421997,
"calib/step_q_c_n": 391.0,
"calib/step_q_gap": 0.0007765502005234737,
"calib/step_q_w": 0.2107579766536965,
"calib/step_q_w_n": 1285.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 752.0,
"completions/max_terminated_length": 752.0,
"completions/mean_length": 294.32421875,
"completions/mean_terminated_length": 295.47845458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.0544,
"grad_norm": 0.0073488312773406506,
"learning_rate": 4.138888888888889e-06,
"loss": 0.0965,
"num_tokens": 11102348.0,
"reward": 1.020928144454956,
"reward_std": 0.0940789133310318,
"rewards/accuracy_reward_step": 0.26953125,
"rewards/final_brier_reward_step": 0.7762312293052673,
"rewards/format_reward_step": 0.99609375,
"step": 51
},
{
"aux_distill/final_loss": 0.007637668693860178,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09585435572080314,
"aux_distill/mean_u": 0.33941107293235234,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 189.125,
"aux_distill/step_loss": 0.950905866920948,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4956369982547993,
"calib/avg_num_step_conf": 5.9375,
"calib/ece": 0.14287007874015747,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009514418681957876,
"calib/mean_conf": 0.19697244094488192,
"calib/mu_c": 0.2041269841269841,
"calib/mu_w": 0.19461256544502623,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04590551181102362,
"calib/std_conf": 0.1671904568053981,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2206233766233766,
"calib/step_q_c_n": 385.0,
"calib/step_q_gap": -0.009763407517592582,
"calib/step_q_w": 0.2303867841409692,
"calib/step_q_w_n": 1135.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 778.0,
"completions/max_terminated_length": 778.0,
"completions/mean_length": 289.40625,
"completions/mean_terminated_length": 290.54119873046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.007487446069717407,
"learning_rate": 4.111111111111111e-06,
"loss": 0.1067,
"num_tokens": 11284388.0,
"reward": 1.0093071460723877,
"reward_std": 0.11890524625778198,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/final_brier_reward_step": 0.7803332209587097,
"rewards/format_reward_step": 0.9921875,
"step": 52
},
{
"aux_distill/final_loss": 0.014197576827427838,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09383068815805018,
"aux_distill/mean_u": 0.34173695455792147,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 218.875,
"aux_distill/step_loss": 0.9241092819720507,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5298270254287404,
"calib/avg_num_step_conf": 6.83984375,
"calib/ece": 0.15933070866141733,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.011014192785334137,
"calib/mean_conf": 0.20807086614173226,
"calib/mu_c": 0.21578947368421056,
"calib/mu_w": 0.20477528089887642,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03409448818897638,
"calib/std_conf": 0.1730180765715237,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.23542208067940548,
"calib/step_q_c_n": 471.0,
"calib/step_q_gap": -0.009796669320594503,
"calib/step_q_w": 0.24521874999999999,
"calib/step_q_w_n": 1280.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2315.0,
"completions/max_terminated_length": 2315.0,
"completions/mean_length": 337.1328125,
"completions/mean_terminated_length": 337.1328125,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.006950410548597574,
"learning_rate": 4.083333333333334e-06,
"loss": 0.1347,
"num_tokens": 11476518.0,
"reward": 1.0199216604232788,
"reward_std": 0.12385044991970062,
"rewards/accuracy_reward_step": 0.296875,
"rewards/final_brier_reward_step": 0.7507808804512024,
"rewards/format_reward_step": 0.9921875,
"step": 53
},
{
"aux_distill/final_loss": 0.010009945388446795,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09145716740749776,
"aux_distill/mean_u": 0.2808475809495889,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 203.625,
"aux_distill/step_loss": 0.9045617207884789,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5724786931818182,
"calib/avg_num_step_conf": 6.4453125,
"calib/ece": 0.15970703125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.00390625,
"calib/gap": 0.036664772727272726,
"calib/mean_conf": 0.19935546875,
"calib/mu_c": 0.2245625,
"calib/mu_w": 0.18789772727272727,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.023281249999999993,
"calib/std_conf": 0.1821295325056257,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2658883826879271,
"calib/step_q_c_n": 439.0,
"calib/step_q_gap": 0.03788532736175035,
"calib/step_q_w": 0.22800305532617673,
"calib/step_q_w_n": 1211.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1219.0,
"completions/max_terminated_length": 1219.0,
"completions/mean_length": 301.80859375,
"completions/mean_terminated_length": 302.9921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.0576,
"grad_norm": 0.006956387776881456,
"learning_rate": 4.055555555555556e-06,
"loss": 0.0934,
"num_tokens": 11660013.0,
"reward": 1.0273517370224,
"reward_std": 0.12011194974184036,
"rewards/accuracy_reward_step": 0.3125,
"rewards/final_brier_reward_step": 0.750015914440155,
"rewards/format_reward_step": 0.9921875,
"step": 54
},
{
"aux_distill/final_loss": 0.0013773050231975503,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09118994430173188,
"aux_distill/mean_u": 0.3320995970261154,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 210.625,
"aux_distill/step_loss": 0.9105221219360828,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4829365079365079,
"calib/avg_num_step_conf": 6.609375,
"calib/ece": 0.16142449799196787,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.012048192771084338,
"calib/gap": -0.01906243386243392,
"calib/mean_conf": 0.20363574297188755,
"calib/mu_c": 0.18916666666666662,
"calib/mu_w": 0.20822910052910054,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.06204819277108434,
"calib/std_conf": 0.19055744321386933,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.22871794871794873,
"calib/step_q_c_n": 351.0,
"calib/step_q_gap": -0.018766540469225046,
"calib/step_q_w": 0.24748448918717378,
"calib/step_q_w_n": 1341.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2219.0,
"completions/max_terminated_length": 2219.0,
"completions/mean_length": 335.859375,
"completions/mean_terminated_length": 338.5039367675781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.006341911386698484,
"learning_rate": 4.027777777777779e-06,
"loss": 0.1049,
"num_tokens": 11853817.0,
"reward": 0.97525954246521,
"reward_std": 0.16055387258529663,
"rewards/accuracy_reward_step": 0.234375,
"rewards/final_brier_reward_step": 0.7473939657211304,
"rewards/format_reward_step": 0.96875,
"step": 55
},
{
"aux_distill/final_loss": 0.009648349008784862,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09182535042054951,
"aux_distill/mean_u": 0.3306901748601806,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 215.375,
"aux_distill/step_loss": 0.9086051415652037,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4698883161512028,
"calib/avg_num_step_conf": 6.73046875,
"calib/ece": 0.19055511811023623,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.027559055118110236,
"calib/gap": -0.04547766323024058,
"calib/mean_conf": 0.2159015748031496,
"calib/mu_c": 0.18116666666666664,
"calib/mu_w": 0.22664432989690722,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.08511811023622048,
"calib/std_conf": 0.2136231738159996,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.22120760368663592,
"calib/step_q_c_n": 434.0,
"calib/step_q_gap": -0.0426764149324487,
"calib/step_q_w": 0.2638840186190846,
"calib/step_q_w_n": 1289.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2405.0,
"completions/max_terminated_length": 2405.0,
"completions/mean_length": 350.765625,
"completions/mean_terminated_length": 350.765625,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.006642908789217472,
"learning_rate": 4.000000000000001e-06,
"loss": 0.1088,
"num_tokens": 12050453.0,
"reward": 0.9888845682144165,
"reward_std": 0.11521496623754501,
"rewards/accuracy_reward_step": 0.234375,
"rewards/final_brier_reward_step": 0.7512066960334778,
"rewards/format_reward_step": 0.9921875,
"step": 56
},
{
"aux_distill/final_loss": 0.012068207034644729,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09138673334382474,
"aux_distill/mean_u": 0.29421892223544,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 231.0,
"aux_distill/step_loss": 0.9017991088330746,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5083401416122004,
"calib/avg_num_step_conf": 7.22265625,
"calib/ece": 0.2613253012048193,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0024999999999999745,
"calib/mean_conf": 0.18903614457831325,
"calib/mu_c": 0.18750000000000003,
"calib/mu_w": 0.19,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.032409638554216864,
"calib/std_conf": 0.18268379285388484,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24536891679748823,
"calib/step_q_c_n": 637.0,
"calib/step_q_gap": 0.017687398645673008,
"calib/step_q_w": 0.22768151815181523,
"calib/step_q_w_n": 1212.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2433.0,
"completions/max_terminated_length": 2433.0,
"completions/mean_length": 396.7109375,
"completions/mean_terminated_length": 398.2666931152344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.0608,
"grad_norm": 0.005428283475339413,
"learning_rate": 3.972222222222223e-06,
"loss": 0.1853,
"num_tokens": 12258803.0,
"reward": 1.01131272315979,
"reward_std": 0.14329111576080322,
"rewards/accuracy_reward_step": 0.37890625,
"rewards/final_brier_reward_step": 0.6710629463195801,
"rewards/format_reward_step": 0.97265625,
"step": 57
},
{
"aux_distill/final_loss": 0.016176513805476134,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08761006209533662,
"aux_distill/mean_u": 0.2756479289251673,
"aux_distill/n_active_final_tok": 30.75,
"aux_distill/n_active_tok": 245.0,
"aux_distill/step_loss": 0.8599240900948644,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5305960154026452,
"calib/avg_num_step_conf": 7.6796875,
"calib/ece": 0.21080240485829954,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.012145748987854251,
"calib/gap": -0.0014380976058931683,
"calib/mean_conf": 0.20757816194331982,
"calib/mu_c": 0.20652433333333334,
"calib/mu_w": 0.2079624309392265,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.07558704453441295,
"calib/std_conf": 0.20527577065297345,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2406767027835052,
"calib/step_q_c_n": 388.0,
"calib/step_q_gap": 0.030452875153593906,
"calib/step_q_w": 0.2102238276299113,
"calib/step_q_w_n": 1578.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2910.0,
"completions/max_terminated_length": 2910.0,
"completions/mean_length": 427.3359375,
"completions/mean_terminated_length": 427.3359375,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.005393090657889843,
"learning_rate": 3.944444444444445e-06,
"loss": 0.1755,
"num_tokens": 12474521.0,
"reward": 0.9693734049797058,
"reward_std": 0.1678314357995987,
"rewards/accuracy_reward_step": 0.2578125,
"rewards/final_brier_reward_step": 0.7239030599594116,
"rewards/format_reward_step": 0.95703125,
"step": 58
},
{
"aux_distill/final_loss": 0.0203451911911543,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08644241420552135,
"aux_distill/mean_u": 0.2678265314548987,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 233.5,
"aux_distill/step_loss": 0.8440789300948381,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4464579336804604,
"calib/avg_num_step_conf": 7.296875,
"calib/ece": 0.24825454545454542,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.011857707509881422,
"calib/gap": -0.025314613592765156,
"calib/mean_conf": 0.22107351778656129,
"calib/mu_c": 0.20466404494382023,
"calib/mu_w": 0.2299786585365854,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05877470355731226,
"calib/std_conf": 0.21842382462612686,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24388932384341636,
"calib/step_q_c_n": 562.0,
"calib/step_q_gap": -0.0174102167385132,
"calib/step_q_w": 0.26129954058192956,
"calib/step_q_w_n": 1306.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2208.0,
"completions/max_terminated_length": 2208.0,
"completions/mean_length": 384.203125,
"completions/mean_terminated_length": 384.203125,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.005627437960356474,
"learning_rate": 3.916666666666667e-06,
"loss": 0.1036,
"num_tokens": 12679125.0,
"reward": 1.0117087364196777,
"reward_std": 0.13074266910552979,
"rewards/accuracy_reward_step": 0.34765625,
"rewards/final_brier_reward_step": 0.6874798536300659,
"rewards/format_reward_step": 0.98828125,
"step": 59
},
{
"aux_distill/final_loss": 0.009482828684667766,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08887755987234414,
"aux_distill/mean_u": 0.2723864582715786,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 244.25,
"aux_distill/step_loss": 0.8792927600443363,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.49565323366915826,
"calib/avg_num_step_conf": 7.6328125,
"calib/ece": 0.19774698795180726,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.01606425702811245,
"calib/gap": -0.0018043548911277352,
"calib/mean_conf": 0.19807630522088351,
"calib/mu_c": 0.19676470588235295,
"calib/mu_w": 0.1985690607734807,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.061365461847389564,
"calib/std_conf": 0.2058185689731123,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.21266968325791855,
"calib/step_q_c_n": 442.0,
"calib/step_q_gap": -0.02202555483731955,
"calib/step_q_w": 0.2346952380952381,
"calib/step_q_w_n": 1512.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2729.0,
"completions/max_terminated_length": 2729.0,
"completions/mean_length": 414.85546875,
"completions/mean_terminated_length": 414.85546875,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.064,
"grad_norm": 0.005547628737986088,
"learning_rate": 3.88888888888889e-06,
"loss": 0.164,
"num_tokens": 12894184.0,
"reward": 0.9815865755081177,
"reward_std": 0.1615404486656189,
"rewards/accuracy_reward_step": 0.265625,
"rewards/final_brier_reward_step": 0.7287981510162354,
"rewards/format_reward_step": 0.96875,
"step": 60
},
{
"aux_distill/final_loss": 0.006618549310587696,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08587677648756653,
"aux_distill/mean_u": 0.2584981882242442,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 268.75,
"aux_distill/step_loss": 0.8521491996943951,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5252466251298027,
"calib/avg_num_step_conf": 8.3984375,
"calib/ece": 0.28567928286852584,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.00796812749003984,
"calib/gap": 0.014015316718587745,
"calib/mean_conf": 0.20611354581673305,
"calib/mu_c": 0.21415420560747664,
"calib/mu_w": 0.2001388888888889,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.032749003984063746,
"calib/std_conf": 0.2252031310944807,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.25237496653279784,
"calib/step_q_c_n": 747.0,
"calib/step_q_gap": 0.029025002170716574,
"calib/step_q_w": 0.22334996436208127,
"calib/step_q_w_n": 1403.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2498.0,
"completions/max_terminated_length": 2498.0,
"completions/mean_length": 396.87890625,
"completions/mean_terminated_length": 396.87890625,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.005713924765586853,
"learning_rate": 3.861111111111112e-06,
"loss": 0.2191,
"num_tokens": 13099849.0,
"reward": 1.0205470323562622,
"reward_std": 0.1633109450340271,
"rewards/accuracy_reward_step": 0.41796875,
"rewards/final_brier_reward_step": 0.6465628147125244,
"rewards/format_reward_step": 0.9765625,
"step": 61
},
{
"aux_distill/final_loss": 0.0006032689170751837,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08355497196316719,
"aux_distill/mean_u": 0.25818921676101536,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 283.75,
"aux_distill/step_loss": 0.834946446120739,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5345317725752509,
"calib/avg_num_step_conf": 8.8671875,
"calib/ece": 0.18156626506024098,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.008032128514056224,
"calib/gap": 0.01583444816053514,
"calib/mean_conf": 0.21706827309236948,
"calib/mu_c": 0.22876923076923075,
"calib/mu_w": 0.2129347826086956,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.06879518072289155,
"calib/std_conf": 0.22198954206126015,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.22276859504132232,
"calib/step_q_c_n": 484.0,
"calib/step_q_gap": -0.005437284018028177,
"calib/step_q_w": 0.2282058790593505,
"calib/step_q_w_n": 1786.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2936.0,
"completions/max_terminated_length": 2936.0,
"completions/mean_length": 481.65625,
"completions/mean_terminated_length": 481.65625,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.004894188605248928,
"learning_rate": 3.833333333333334e-06,
"loss": 0.146,
"num_tokens": 13330233.0,
"reward": 0.9838611483573914,
"reward_std": 0.15775221586227417,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/final_brier_reward_step": 0.7411597967147827,
"rewards/format_reward_step": 0.97265625,
"step": 62
},
{
"aux_distill/final_loss": 0.010994663068231603,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08830803050659597,
"aux_distill/mean_u": 0.3099173921582291,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 263.875,
"aux_distill/step_loss": 0.8720856215804815,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4667717086834734,
"calib/avg_num_step_conf": 8.24609375,
"calib/ece": 0.24200355731225295,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.011857707509881422,
"calib/gap": -0.013549173669467784,
"calib/mean_conf": 0.19372766798418972,
"calib/mu_c": 0.18473058823529412,
"calib/mu_w": 0.1982797619047619,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.04988142292490119,
"calib/std_conf": 0.201006340197766,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.21841632653061224,
"calib/step_q_c_n": 735.0,
"calib/step_q_gap": -0.03451608626008543,
"calib/step_q_w": 0.25293241279069767,
"calib/step_q_w_n": 1376.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2087.0,
"completions/max_terminated_length": 2087.0,
"completions/mean_length": 444.6328125,
"completions/mean_terminated_length": 444.6328125,
"completions/min_length": 164.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.0672,
"grad_norm": 0.004928493872284889,
"learning_rate": 3.8055555555555556e-06,
"loss": 0.1096,
"num_tokens": 13552699.0,
"reward": 1.0073766708374023,
"reward_std": 0.13989222049713135,
"rewards/accuracy_reward_step": 0.33203125,
"rewards/final_brier_reward_step": 0.698347270488739,
"rewards/format_reward_step": 0.984375,
"step": 63
},
{
"aux_distill/final_loss": 0.004576153917696502,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.082359075313434,
"aux_distill/mean_u": 0.2690879073024952,
"aux_distill/n_active_final_tok": 30.875,
"aux_distill/n_active_tok": 274.25,
"aux_distill/step_loss": 0.8190145855769515,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5289093767867352,
"calib/avg_num_step_conf": 8.84765625,
"calib/ece": 0.2756566801619434,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.004048582995951417,
"calib/gap": 0.004396119210977656,
"calib/mean_conf": 0.1775417004048583,
"calib/mu_c": 0.1803715909090909,
"calib/mu_w": 0.17597547169811326,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04846153846153846,
"calib/std_conf": 0.2040467381214395,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2388135520684736,
"calib/step_q_c_n": 701.0,
"calib/step_q_gap": 0.047575828283307364,
"calib/step_q_w": 0.19123772378516624,
"calib/step_q_w_n": 1564.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2459.0,
"completions/max_terminated_length": 2459.0,
"completions/mean_length": 446.28125,
"completions/mean_terminated_length": 449.7952880859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.004880514927208424,
"learning_rate": 3.777777777777778e-06,
"loss": 0.1574,
"num_tokens": 13770723.0,
"reward": 0.9896012544631958,
"reward_std": 0.16695758700370789,
"rewards/accuracy_reward_step": 0.34375,
"rewards/final_brier_reward_step": 0.6745150089263916,
"rewards/format_reward_step": 0.9609375,
"step": 64
},
{
"aux_distill/final_loss": 0.0014026463968548342,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.080779759446159,
"aux_distill/mean_u": 0.2522538711397831,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 242.75,
"aux_distill/step_loss": 0.8063949355855584,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5213543412671806,
"calib/avg_num_step_conf": 7.5859375,
"calib/ece": 0.2562698412698412,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.01984126984126984,
"calib/gap": 0.016745558162923252,
"calib/mean_conf": 0.18777777777777777,
"calib/mu_c": 0.1982105263157895,
"calib/mu_w": 0.18146496815286625,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.033531746031746024,
"calib/std_conf": 0.22130166471981214,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24587326120556413,
"calib/step_q_c_n": 647.0,
"calib/step_q_gap": 0.014157431089734024,
"calib/step_q_w": 0.2317158301158301,
"calib/step_q_w_n": 1295.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2644.0,
"completions/max_terminated_length": 2644.0,
"completions/mean_length": 405.52734375,
"completions/mean_terminated_length": 405.52734375,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.005717034917324781,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.1686,
"num_tokens": 13979562.0,
"reward": 1.0164703130722046,
"reward_std": 0.14506025612354279,
"rewards/accuracy_reward_step": 0.37109375,
"rewards/final_brier_reward_step": 0.6774718761444092,
"rewards/format_reward_step": 0.984375,
"step": 65
},
{
"aux_distill/final_loss": 0.0007709396513746469,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0867828888585791,
"aux_distill/mean_u": 0.29476347266499914,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 282.75,
"aux_distill/step_loss": 0.8670579399913549,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5325065366405202,
"calib/avg_num_step_conf": 8.8359375,
"calib/ece": 0.2633064516129032,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.004032258064516129,
"calib/gap": 0.009518761924952324,
"calib/mean_conf": 0.17266129032258065,
"calib/mu_c": 0.17876404494382026,
"calib/mu_w": 0.16924528301886793,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03854838709677419,
"calib/std_conf": 0.2014368101950606,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.22776296296296297,
"calib/step_q_c_n": 675.0,
"calib/step_q_gap": 0.025058237065042344,
"calib/step_q_w": 0.20270472589792063,
"calib/step_q_w_n": 1587.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2444.0,
"completions/max_terminated_length": 2444.0,
"completions/mean_length": 491.45703125,
"completions/mean_terminated_length": 493.38433837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.0704,
"grad_norm": 0.0049970392137765884,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.1521,
"num_tokens": 14211727.0,
"reward": 0.9928978681564331,
"reward_std": 0.15571537613868713,
"rewards/accuracy_reward_step": 0.34765625,
"rewards/final_brier_reward_step": 0.6732957363128662,
"rewards/format_reward_step": 0.96484375,
"step": 66
},
{
"aux_distill/final_loss": 0.0029924702848802553,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08490731683559716,
"aux_distill/mean_u": 0.26169637778988236,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 246.75,
"aux_distill/step_loss": 0.8460806831717491,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5222349643221204,
"calib/avg_num_step_conf": 7.78125,
"calib/ece": 0.32205217391304347,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": 0.013271527777777764,
"calib/mean_conf": 0.16474624505928856,
"calib/mu_c": 0.1723,
"calib/mu_w": 0.15902847222222224,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.027984189723320153,
"calib/std_conf": 0.19943857176791385,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.21303046272493575,
"calib/step_q_c_n": 778.0,
"calib/step_q_gap": -0.002558334639149923,
"calib/step_q_w": 0.21558879736408568,
"calib/step_q_w_n": 1214.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1549.0,
"completions/max_terminated_length": 1549.0,
"completions/mean_length": 433.0234375,
"completions/mean_terminated_length": 436.4330749511719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.004873435944318771,
"learning_rate": 3.694444444444445e-06,
"loss": 0.073,
"num_tokens": 14427589.0,
"reward": 1.0285768508911133,
"reward_std": 0.12726490199565887,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6430913209915161,
"rewards/format_reward_step": 0.98828125,
"step": 67
},
{
"aux_distill/final_loss": 0.0005568949854932725,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08274573809467256,
"aux_distill/mean_u": 0.24946699349708568,
"aux_distill/n_active_final_tok": 30.75,
"aux_distill/n_active_tok": 256.875,
"aux_distill/step_loss": 0.8269004672765732,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.4968096419709323,
"calib/avg_num_step_conf": 8.03125,
"calib/ece": 0.2826939024390244,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.016260162601626018,
"calib/gap": -0.011313314427507937,
"calib/mean_conf": 0.1962491869918699,
"calib/mu_c": 0.18912087912087913,
"calib/mu_w": 0.20043419354838707,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05451219512195122,
"calib/std_conf": 0.22594439756723764,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24029900332225917,
"calib/step_q_c_n": 602.0,
"calib/step_q_gap": -0.0018046417946596949,
"calib/step_q_w": 0.24210364511691887,
"calib/step_q_w_n": 1454.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2512.0,
"completions/max_terminated_length": 2512.0,
"completions/mean_length": 471.109375,
"completions/mean_terminated_length": 474.81890869140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.005671203602105379,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.1928,
"num_tokens": 14652281.0,
"reward": 0.9734482765197754,
"reward_std": 0.19877000153064728,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.6422090530395508,
"rewards/format_reward_step": 0.94921875,
"step": 68
},
{
"aux_distill/final_loss": 0.004735883499961346,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08559095812961459,
"aux_distill/mean_u": 0.2745780916439154,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 238.375,
"aux_distill/step_loss": 0.8511736784130335,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4968984321745058,
"calib/avg_num_step_conf": 7.44921875,
"calib/ece": 0.276798418972332,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.01020518064076345,
"calib/mean_conf": 0.16913043478260872,
"calib/mu_c": 0.16255555555555556,
"calib/mu_w": 0.17276073619631901,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.045098814229249,
"calib/std_conf": 0.2054668711999103,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.24599970544919,
"calib/step_q_c_n": 679.0,
"calib/step_q_gap": 0.03817218101922257,
"calib/step_q_w": 0.20782752442996744,
"calib/step_q_w_n": 1228.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2207.0,
"completions/max_terminated_length": 2207.0,
"completions/mean_length": 478.5234375,
"completions/mean_terminated_length": 478.5234375,
"completions/min_length": 148.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.0736,
"grad_norm": 0.005036453250795603,
"learning_rate": 3.638888888888889e-06,
"loss": 0.1474,
"num_tokens": 14879279.0,
"reward": 1.0080058574676514,
"reward_std": 0.13084906339645386,
"rewards/accuracy_reward_step": 0.3515625,
"rewards/final_brier_reward_step": 0.6800742149353027,
"rewards/format_reward_step": 0.984375,
"step": 69
},
{
"aux_distill/final_loss": 0.01110040802086587,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08515842608176172,
"aux_distill/mean_u": 0.26320584469456093,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 256.625,
"aux_distill/step_loss": 0.8404838293790817,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5273792613636363,
"calib/avg_num_step_conf": 8.12109375,
"calib/ece": 0.27547540322580644,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.024193548387096774,
"calib/gap": -0.006413636363636371,
"calib/mean_conf": 0.17791169354838712,
"calib/mu_c": 0.17377386363636363,
"calib/mu_w": 0.1801875,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0492741935483871,
"calib/std_conf": 0.22329706161560695,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.22325165794066318,
"calib/step_q_c_n": 573.0,
"calib/step_q_gap": 0.023962082907462656,
"calib/step_q_w": 0.19928957503320052,
"calib/step_q_w_n": 1506.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2565.0,
"completions/max_terminated_length": 2565.0,
"completions/mean_length": 477.8359375,
"completions/mean_terminated_length": 479.7098388671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.004834037274122238,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.1402,
"num_tokens": 15108597.0,
"reward": 0.9890013337135315,
"reward_std": 0.16583868861198425,
"rewards/accuracy_reward_step": 0.34375,
"rewards/final_brier_reward_step": 0.6655027270317078,
"rewards/format_reward_step": 0.96875,
"step": 70
},
{
"aux_distill/final_loss": 0.0004364773476481787,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08168565214145929,
"aux_distill/mean_u": 0.27507697667444936,
"aux_distill/n_active_final_tok": 30.75,
"aux_distill/n_active_tok": 259.25,
"aux_distill/step_loss": 0.8164200261235237,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.4487418831168831,
"calib/avg_num_step_conf": 8.265625,
"calib/ece": 0.21979674796747972,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.012195121951219513,
"calib/gap": -0.01907954545454546,
"calib/mean_conf": 0.21565040650406503,
"calib/mu_c": 0.20199999999999999,
"calib/mu_w": 0.22107954545454545,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.07544715447154472,
"calib/std_conf": 0.2400139387463105,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2538742924528302,
"calib/step_q_c_n": 424.0,
"calib/step_q_gap": 0.004038063138409431,
"calib/step_q_w": 0.2498362293144208,
"calib/step_q_w_n": 1692.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2724.0,
"completions/max_terminated_length": 2724.0,
"completions/mean_length": 495.70703125,
"completions/mean_terminated_length": 497.6510009765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.0045654685236513615,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.1787,
"num_tokens": 15339906.0,
"reward": 0.9661494493484497,
"reward_std": 0.18799448013305664,
"rewards/accuracy_reward_step": 0.2734375,
"rewards/final_brier_reward_step": 0.6979237794876099,
"rewards/format_reward_step": 0.9609375,
"step": 71
},
{
"aux_distill/final_loss": 0.0003922245759895304,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08633854042273015,
"aux_distill/mean_u": 0.2690554787303164,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 246.25,
"aux_distill/step_loss": 0.8629931565374136,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5078947368421053,
"calib/avg_num_step_conf": 7.6953125,
"calib/ece": 0.2662845849802371,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": 0.0025096602265156087,
"calib/mean_conf": 0.15885375494071147,
"calib/mu_c": 0.1604210526315789,
"calib/mu_w": 0.1579113924050633,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.024822134387351785,
"calib/std_conf": 0.1917175086378693,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.224953125,
"calib/step_q_c_n": 640.0,
"calib/step_q_gap": 0.02508094454887219,
"calib/step_q_w": 0.1998721804511278,
"calib/step_q_w_n": 1330.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2916.0,
"completions/max_terminated_length": 2916.0,
"completions/mean_length": 451.59375,
"completions/mean_terminated_length": 451.59375,
"completions/min_length": 158.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.0768,
"grad_norm": 0.004893281031399965,
"learning_rate": 3.555555555555556e-06,
"loss": 0.1686,
"num_tokens": 15559922.0,
"reward": 1.0133074522018433,
"reward_std": 0.13455332815647125,
"rewards/accuracy_reward_step": 0.37109375,
"rewards/final_brier_reward_step": 0.6711460947990417,
"rewards/format_reward_step": 0.984375,
"step": 72
},
{
"aux_distill/final_loss": 0.0012096594082322554,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08101288764737546,
"aux_distill/mean_u": 0.23852064830184302,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 241.125,
"aux_distill/step_loss": 0.8089192043989897,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4784493833504625,
"calib/avg_num_step_conf": 7.53515625,
"calib/ece": 0.3429362549800796,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.00796812749003984,
"calib/gap": -0.004278198869475863,
"calib/mean_conf": 0.15578884462151396,
"calib/mu_c": 0.15341964285714285,
"calib/mu_w": 0.15769784172661871,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.026254980079681273,
"calib/std_conf": 0.1906614384154937,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.20033562585969739,
"calib/step_q_c_n": 727.0,
"calib/step_q_gap": 0.0030925310177672416,
"calib/step_q_w": 0.19724309484193014,
"calib/step_q_w_n": 1202.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3031.0,
"completions/max_terminated_length": 3031.0,
"completions/mean_length": 455.16796875,
"completions/mean_terminated_length": 455.16796875,
"completions/min_length": 152.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.005076461471617222,
"learning_rate": 3.5277777777777784e-06,
"loss": 0.1768,
"num_tokens": 15783477.0,
"reward": 1.013967752456665,
"reward_std": 0.1508485972881317,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.6138730049133301,
"rewards/format_reward_step": 0.9765625,
"step": 73
},
{
"aux_distill/final_loss": 0.00911986950541177,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08642115711700171,
"aux_distill/mean_u": 0.24410496762555028,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 221.5,
"aux_distill/step_loss": 0.8550916835665703,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5061310330077707,
"calib/avg_num_step_conf": 7.00390625,
"calib/ece": 0.24390873015873016,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.015873015873015872,
"calib/gap": -0.004719826049761178,
"calib/mean_conf": 0.1889484126984127,
"calib/mu_c": 0.1857831325301205,
"calib/mu_w": 0.19050295857988167,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.05174603174603174,
"calib/std_conf": 0.21468508784246035,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2299671052631579,
"calib/step_q_c_n": 532.0,
"calib/step_q_gap": -0.005350103301473369,
"calib/step_q_w": 0.23531720856463126,
"calib/step_q_w_n": 1261.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2217.0,
"completions/max_terminated_length": 2217.0,
"completions/mean_length": 418.3984375,
"completions/mean_terminated_length": 420.03924560546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.005768840666860342,
"learning_rate": 3.5e-06,
"loss": 0.1246,
"num_tokens": 15994515.0,
"reward": 1.0006341934204102,
"reward_std": 0.14704427123069763,
"rewards/accuracy_reward_step": 0.32421875,
"rewards/final_brier_reward_step": 0.6965809464454651,
"rewards/format_reward_step": 0.98046875,
"step": 74
},
{
"aux_distill/final_loss": 0.0003913615232704615,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0784449273487553,
"aux_distill/mean_u": 0.23628307227973086,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 210.75,
"aux_distill/step_loss": 0.7840579003095627,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.48269230769230775,
"calib/avg_num_step_conf": 6.5859375,
"calib/ece": 0.3793307086614173,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.011811023622047244,
"calib/gap": -0.0010769230769230587,
"calib/mean_conf": 0.1744488188976378,
"calib/mu_c": 0.17392307692307693,
"calib/mu_w": 0.175,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.020984251968503936,
"calib/std_conf": 0.20148926657727723,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.20788372093023252,
"calib/step_q_c_n": 860.0,
"calib/step_q_gap": 0.00592246185032938,
"calib/step_q_w": 0.20196125907990314,
"calib/step_q_w_n": 826.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1192.0,
"completions/max_terminated_length": 1192.0,
"completions/mean_length": 402.38671875,
"completions/mean_terminated_length": 403.9647216796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.08,
"grad_norm": 0.005615293513983488,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.078,
"num_tokens": 16202278.0,
"reward": 1.0420210361480713,
"reward_std": 0.1387816220521927,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.5879480838775635,
"rewards/format_reward_step": 0.98828125,
"step": 75
},
{
"aux_distill/final_loss": 0.0003459962840679509,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0877249448094517,
"aux_distill/mean_u": 0.2889035125026195,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 214.125,
"aux_distill/step_loss": 0.8769034389406443,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5242957746478873,
"calib/avg_num_step_conf": 6.69140625,
"calib/ece": 0.3186734126984127,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.031746031746031744,
"calib/gap": 0.006201702944942322,
"calib/mean_conf": 0.19045357142857144,
"calib/mu_c": 0.1939481818181818,
"calib/mu_w": 0.18774647887323948,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03630952380952381,
"calib/std_conf": 0.2311935786750537,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2421694152923538,
"calib/step_q_c_n": 667.0,
"calib/step_q_gap": 0.030811862711091847,
"calib/step_q_w": 0.21135755258126196,
"calib/step_q_w_n": 1046.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2517.0,
"completions/max_terminated_length": 2517.0,
"completions/mean_length": 412.6015625,
"completions/mean_terminated_length": 414.2196350097656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.005647731013596058,
"learning_rate": 3.444444444444445e-06,
"loss": 0.0926,
"num_tokens": 16410960.0,
"reward": 1.0235514640808105,
"reward_std": 0.15799517929553986,
"rewards/accuracy_reward_step": 0.4296875,
"rewards/final_brier_reward_step": 0.6330406069755554,
"rewards/format_reward_step": 0.984375,
"step": 76
},
{
"aux_distill/final_loss": 0.00035550804295780836,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0853228303603828,
"aux_distill/mean_u": 0.2870328522394571,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 233.875,
"aux_distill/step_loss": 0.8528727777302265,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4457294224059956,
"calib/avg_num_step_conf": 7.30859375,
"calib/ece": 0.3397609561752986,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.01593625498007968,
"calib/gap": -0.03664749967696085,
"calib/mean_conf": 0.1509163346613546,
"calib/mu_c": 0.1301834862385321,
"calib/mu_w": 0.16683098591549295,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.028207171314741035,
"calib/std_conf": 0.1957179982341441,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.19174825174825175,
"calib/step_q_c_n": 715.0,
"calib/step_q_gap": -0.0004057274905025776,
"calib/step_q_w": 0.19215397923875432,
"calib/step_q_w_n": 1156.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3069.0,
"completions/max_terminated_length": 3069.0,
"completions/mean_length": 425.59375,
"completions/mean_terminated_length": 427.26275634765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.005499564111232758,
"learning_rate": 3.416666666666667e-06,
"loss": 0.1261,
"num_tokens": 16624576.0,
"reward": 1.0059542655944824,
"reward_std": 0.13684764504432678,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6056585907936096,
"rewards/format_reward_step": 0.98046875,
"step": 77
},
{
"aux_distill/final_loss": 0.02040222806544989,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08757964731194079,
"aux_distill/mean_u": 0.2869950557474653,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 225.125,
"aux_distill/step_loss": 0.8553942292928696,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.551591942820013,
"calib/avg_num_step_conf": 7.03515625,
"calib/ece": 0.3247389558232932,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.020080321285140562,
"calib/gap": 0.028787524366471695,
"calib/mean_conf": 0.16755020080321284,
"calib/mu_c": 0.1831578947368421,
"calib/mu_w": 0.1543703703703704,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0172289156626506,
"calib/std_conf": 0.21556506134494627,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.194529262086514,
"calib/step_q_c_n": 786.0,
"calib/step_q_gap": -0.01558482658343674,
"calib/step_q_w": 0.21011408866995074,
"calib/step_q_w_n": 1015.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2819.0,
"completions/max_terminated_length": 2819.0,
"completions/mean_length": 457.21484375,
"completions/mean_terminated_length": 462.6363830566406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.0832,
"grad_norm": 0.005055857822299004,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.1238,
"num_tokens": 16849647.0,
"reward": 1.0199202299118042,
"reward_std": 0.17445480823516846,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.617965579032898,
"rewards/format_reward_step": 0.97265625,
"step": 78
},
{
"aux_distill/final_loss": 0.00029105614953550685,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08373665565159172,
"aux_distill/mean_u": 0.2616512076200522,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 208.75,
"aux_distill/step_loss": 0.8370754849165678,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5389074900793651,
"calib/avg_num_step_conf": 6.53515625,
"calib/ece": 0.32423359375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.01953125,
"calib/gap": 0.02861765873015873,
"calib/mean_conf": 0.16881328125,
"calib/mu_c": 0.1849107142857143,
"calib/mu_w": 0.15629305555555556,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.027773437499999998,
"calib/std_conf": 0.22332490340277414,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.21726239193083574,
"calib/step_q_c_n": 694.0,
"calib/step_q_gap": 0.012743801532470067,
"calib/step_q_w": 0.20451859039836567,
"calib/step_q_w_n": 979.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 994.0,
"completions/max_terminated_length": 994.0,
"completions/mean_length": 442.2734375,
"completions/mean_terminated_length": 444.00787353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.0052482676692306995,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.0962,
"num_tokens": 17069245.0,
"reward": 1.0397592782974243,
"reward_std": 0.11951573193073273,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.6459249258041382,
"rewards/format_reward_step": 0.99609375,
"step": 79
},
{
"aux_distill/final_loss": 0.00031070784007170005,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0862661061109975,
"aux_distill/mean_u": 0.28112182156062415,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 223.25,
"aux_distill/step_loss": 0.8623503372073174,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.42532508127031765,
"calib/avg_num_step_conf": 6.98046875,
"calib/ece": 0.40391304347826096,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.011857707509881422,
"calib/gap": -0.02922355588897224,
"calib/mean_conf": 0.14106719367588932,
"calib/mu_c": 0.12674418604651164,
"calib/mu_w": 0.15596774193548388,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.017549407114624504,
"calib/std_conf": 0.18393386697878347,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.17260024301336574,
"calib/step_q_c_n": 823.0,
"calib/step_q_gap": -0.031564694745970345,
"calib/step_q_w": 0.20416493775933608,
"calib/step_q_w_n": 964.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2339.0,
"completions/max_terminated_length": 2339.0,
"completions/mean_length": 425.28515625,
"completions/mean_terminated_length": 426.9529724121094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.005360201466828585,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.1185,
"num_tokens": 17280278.0,
"reward": 1.0216944217681885,
"reward_std": 0.1372138261795044,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.5551074743270874,
"rewards/format_reward_step": 0.984375,
"step": 80
},
{
"aux_distill/final_loss": 0.0003905783214577241,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08434331661555916,
"aux_distill/mean_u": 0.2819454667285919,
"aux_distill/n_active_final_tok": 30.875,
"aux_distill/n_active_tok": 222.625,
"aux_distill/step_loss": 0.8430425636470318,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.4894301470588235,
"calib/avg_num_step_conf": 7.1484375,
"calib/ece": 0.3860728744939271,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.012145748987854251,
"calib/gap": -0.02349264705882348,
"calib/mean_conf": 0.17805668016194334,
"calib/mu_c": 0.1658823529411765,
"calib/mu_w": 0.189375,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.04117408906882591,
"calib/std_conf": 0.2232595048457253,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.20123655913978494,
"calib/step_q_c_n": 744.0,
"calib/step_q_gap": -0.007897879165924077,
"calib/step_q_w": 0.20913443830570902,
"calib/step_q_w_n": 1086.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2382.0,
"completions/max_terminated_length": 2382.0,
"completions/mean_length": 469.6015625,
"completions/mean_terminated_length": 475.16998291015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.0864,
"grad_norm": 0.005131955724209547,
"learning_rate": 3.3055555555555558e-06,
"loss": 0.1836,
"num_tokens": 17506744.0,
"reward": 1.0026121139526367,
"reward_std": 0.19326549768447876,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.5755367279052734,
"rewards/format_reward_step": 0.96484375,
"step": 81
},
{
"aux_distill/final_loss": 0.0002871967699320521,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08434561325702816,
"aux_distill/mean_u": 0.23573001576920807,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 212.375,
"aux_distill/step_loss": 0.843168918043375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5748409669211196,
"calib/avg_num_step_conf": 6.67578125,
"calib/ece": 0.363803984063745,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.03187250996015936,
"calib/gap": 0.03437472646310433,
"calib/mean_conf": 0.1901402390438247,
"calib/mu_c": 0.20808083333333335,
"calib/mu_w": 0.17370610687022903,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03792828685258964,
"calib/std_conf": 0.2433397327036698,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24587994428969362,
"calib/step_q_c_n": 718.0,
"calib/step_q_gap": 0.037715968507655295,
"calib/step_q_w": 0.20816397578203832,
"calib/step_q_w_n": 991.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2977.0,
"completions/max_terminated_length": 2977.0,
"completions/mean_length": 437.53515625,
"completions/mean_terminated_length": 439.2510070800781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.005216378252953291,
"learning_rate": 3.277777777777778e-06,
"loss": 0.1722,
"num_tokens": 17724305.0,
"reward": 1.0312541723251343,
"reward_std": 0.1625767946243286,
"rewards/accuracy_reward_step": 0.46875,
"rewards/final_brier_reward_step": 0.6132895946502686,
"rewards/format_reward_step": 0.98046875,
"step": 82
},
{
"aux_distill/final_loss": 0.013176210077972428,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07911544980015606,
"aux_distill/mean_u": 0.26459626715690676,
"aux_distill/n_active_final_tok": 30.75,
"aux_distill/n_active_tok": 234.125,
"aux_distill/step_loss": 0.7779782712459564,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.46085120207927227,
"calib/avg_num_step_conf": 7.62109375,
"calib/ece": 0.3763855421686746,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.020080321285140562,
"calib/gap": -0.01789863547758283,
"calib/mean_conf": 0.1765461847389558,
"calib/mu_c": 0.1668421052631579,
"calib/mu_w": 0.18474074074074073,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.04755020080321285,
"calib/std_conf": 0.24044996071936003,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2100665338645418,
"calib/step_q_c_n": 753.0,
"calib/step_q_gap": 0.011218286786077697,
"calib/step_q_w": 0.1988482470784641,
"calib/step_q_w_n": 1198.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2998.0,
"completions/max_terminated_length": 2998.0,
"completions/mean_length": 490.8984375,
"completions/mean_terminated_length": 498.69049072265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.004931866656988859,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.1023,
"num_tokens": 17957239.0,
"reward": 0.9939597845077515,
"reward_std": 0.18009626865386963,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.5816695094108582,
"rewards/format_reward_step": 0.9609375,
"step": 83
},
{
"aux_distill/final_loss": 0.00035623072062662686,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08428628486581147,
"aux_distill/mean_u": 0.2673578162871099,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 197.125,
"aux_distill/step_loss": 0.8425065949559212,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5251001421372271,
"calib/avg_num_step_conf": 6.2109375,
"calib/ece": 0.3058167330677291,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.01593625498007968,
"calib/gap": 0.025248740147305854,
"calib/mean_conf": 0.1901195219123506,
"calib/mu_c": 0.20440366972477064,
"calib/mu_w": 0.1791549295774648,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.030836653386454183,
"calib/std_conf": 0.2363947034813479,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.23518846153846154,
"calib/step_q_c_n": 650.0,
"calib/step_q_gap": 0.017486333878887095,
"calib/step_q_w": 0.21770212765957445,
"calib/step_q_w_n": 940.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2197.0,
"completions/max_terminated_length": 2197.0,
"completions/mean_length": 410.01953125,
"completions/mean_terminated_length": 413.2480163574219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.0896,
"grad_norm": 0.006021194159984589,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.1134,
"num_tokens": 18168124.0,
"reward": 1.022384762763977,
"reward_std": 0.16455532610416412,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6385195255279541,
"rewards/format_reward_step": 0.98046875,
"step": 84
},
{
"aux_distill/final_loss": 0.006193090820715952,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07979736186098307,
"aux_distill/mean_u": 0.2422023146113763,
"aux_distill/n_active_final_tok": 30.75,
"aux_distill/n_active_tok": 190.5,
"aux_distill/step_loss": 0.7917805155739188,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5719209558823529,
"calib/avg_num_step_conf": 6.1328125,
"calib/ece": 0.30544354838709675,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.028225806451612902,
"calib/gap": 0.05423319327731088,
"calib/mean_conf": 0.20633064516129032,
"calib/mu_c": 0.23607142857142854,
"calib/mu_w": 0.18183823529411766,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.03008064516129032,
"calib/std_conf": 0.2503913138042453,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.25409425625920473,
"calib/step_q_c_n": 679.0,
"calib/step_q_gap": 0.04691131574293089,
"calib/step_q_w": 0.20718294051627384,
"calib/step_q_w_n": 891.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 3035.0,
"completions/max_terminated_length": 3035.0,
"completions/mean_length": 453.69140625,
"completions/mean_terminated_length": 460.89288330078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.005572810769081116,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.1582,
"num_tokens": 18392093.0,
"reward": 1.0132523775100708,
"reward_std": 0.1999160796403885,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.6280671954154968,
"rewards/format_reward_step": 0.9609375,
"step": 85
},
{
"aux_distill/final_loss": 0.0001749628336256137,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08336643897928298,
"aux_distill/mean_u": 0.24921388933849128,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 198.0,
"aux_distill/step_loss": 0.8334894105792046,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4834799608993158,
"calib/avg_num_step_conf": 6.1875,
"calib/ece": 0.2961417322834645,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.023622047244094488,
"calib/gap": -0.02200977517106542,
"calib/mean_conf": 0.19858267716535433,
"calib/mu_c": 0.18515151515151518,
"calib/mu_w": 0.2071612903225806,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05248031496062992,
"calib/std_conf": 0.24160462401532942,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.26600856164383563,
"calib/step_q_c_n": 584.0,
"calib/step_q_gap": 0.03908856164383562,
"calib/step_q_w": 0.22692,
"calib/step_q_w_n": 1000.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2454.0,
"completions/max_terminated_length": 2454.0,
"completions/mean_length": 431.05859375,
"completions/mean_terminated_length": 431.05859375,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.005810996517539024,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.1372,
"num_tokens": 18607956.0,
"reward": 1.015267252922058,
"reward_std": 0.14874380826950073,
"rewards/accuracy_reward_step": 0.38671875,
"rewards/final_brier_reward_step": 0.6516281366348267,
"rewards/format_reward_step": 0.9921875,
"step": 86
},
{
"aux_distill/final_loss": 0.00023801279348845128,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07791043911129236,
"aux_distill/mean_u": 0.24190785824083708,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 198.0,
"aux_distill/step_loss": 0.7788663636893034,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5239746383990489,
"calib/avg_num_step_conf": 6.1875,
"calib/ece": 0.45493600000000006,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.04,
"calib/gap": 0.0007821147876626577,
"calib/mean_conf": 0.205256,
"calib/mu_c": 0.205578231292517,
"calib/mu_w": 0.20479611650485435,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.036096,
"calib/std_conf": 0.2592800001234187,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2335923076923077,
"calib/step_q_c_n": 910.0,
"calib/step_q_gap": -0.006748938598493487,
"calib/step_q_w": 0.2403412462908012,
"calib/step_q_w_n": 674.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2902.0,
"completions/max_terminated_length": 2902.0,
"completions/mean_length": 408.984375,
"completions/mean_terminated_length": 410.5882568359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.0928,
"grad_norm": 0.0059371222741901875,
"learning_rate": 3.138888888888889e-06,
"loss": 0.1722,
"num_tokens": 18818152.0,
"reward": 1.0399489402770996,
"reward_std": 0.20458626747131348,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.5291163921356201,
"rewards/format_reward_step": 0.97265625,
"step": 87
},
{
"aux_distill/final_loss": 0.00039201670324473525,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0839028840418905,
"aux_distill/mean_u": 0.23952964579450506,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 192.5,
"aux_distill/step_loss": 0.8386368071660399,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5390609390609391,
"calib/avg_num_step_conf": 6.19921875,
"calib/ece": 0.31815040322580646,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.04032258064516129,
"calib/gap": 0.007915371295371287,
"calib/mean_conf": 0.18644637096774194,
"calib/mu_c": 0.19101047619047617,
"calib/mu_w": 0.1830951048951049,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.04060483870967742,
"calib/std_conf": 0.2359217388068951,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.26278109028960817,
"calib/step_q_c_n": 587.0,
"calib/step_q_gap": 0.04397039028960817,
"calib/step_q_w": 0.2188107,
"calib/step_q_w_n": 1000.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2828.0,
"completions/max_terminated_length": 2828.0,
"completions/mean_length": 455.66796875,
"completions/mean_terminated_length": 462.90081787109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.005285973194986582,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.15,
"num_tokens": 19044651.0,
"reward": 1.0013043880462646,
"reward_std": 0.20347905158996582,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/final_brier_reward_step": 0.6276086568832397,
"rewards/format_reward_step": 0.96484375,
"step": 88
},
{
"aux_distill/final_loss": 0.00025402456117262773,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08750179770868272,
"aux_distill/mean_u": 0.2729576968269954,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 186.875,
"aux_distill/step_loss": 0.8747639395296574,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5010689990281827,
"calib/avg_num_step_conf": 5.83984375,
"calib/ece": 0.3205952380952381,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.047619047619047616,
"calib/gap": 0.0007210884353741898,
"calib/mean_conf": 0.19472222222222224,
"calib/mu_c": 0.19514285714285717,
"calib/mu_w": 0.19442176870748298,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.04932539682539683,
"calib/std_conf": 0.2676846681632419,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.22526802218114603,
"calib/step_q_c_n": 541.0,
"calib/step_q_gap": 0.010194646919091499,
"calib/step_q_w": 0.21507337526205453,
"calib/step_q_w_n": 954.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3030.0,
"completions/max_terminated_length": 3030.0,
"completions/mean_length": 456.59765625,
"completions/mean_terminated_length": 456.59765625,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.005344375967979431,
"learning_rate": 3.0833333333333336e-06,
"loss": 0.167,
"num_tokens": 19270428.0,
"reward": 1.0064247846603394,
"reward_std": 0.16095760464668274,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/final_brier_reward_step": 0.6261308789253235,
"rewards/format_reward_step": 0.9765625,
"step": 89
},
{
"aux_distill/final_loss": 0.00018680925779790414,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08290021540597081,
"aux_distill/mean_u": 0.27581140384172503,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 185.875,
"aux_distill/step_loss": 0.828815333545208,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5554987212276215,
"calib/avg_num_step_conf": 5.9296875,
"calib/ece": 0.37179203187251,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.04780876494023904,
"calib/gap": 0.0645884143222506,
"calib/mean_conf": 0.20812828685258966,
"calib/mu_c": 0.2377205882352941,
"calib/mu_w": 0.1731321739130435,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.019043824701195217,
"calib/std_conf": 0.2730507936137177,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.248328488372093,
"calib/step_q_c_n": 688.0,
"calib/step_q_gap": 0.05466559680582794,
"calib/step_q_w": 0.19366289156626507,
"calib/step_q_w_n": 830.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2905.0,
"completions/max_terminated_length": 2905.0,
"completions/mean_length": 399.02734375,
"completions/mean_terminated_length": 402.1692810058594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.096,
"grad_norm": 0.005783769767731428,
"learning_rate": 3.055555555555556e-06,
"loss": 0.1483,
"num_tokens": 19475899.0,
"reward": 1.0489718914031982,
"reward_std": 0.19389018416404724,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.5862250328063965,
"rewards/format_reward_step": 0.98046875,
"step": 90
},
{
"aux_distill/final_loss": 0.00015373918563454936,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08251471724361181,
"aux_distill/mean_u": 0.22730499874422605,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 188.25,
"aux_distill/step_loss": 0.8249934185296297,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5268458528449549,
"calib/avg_num_step_conf": 5.8828125,
"calib/ece": 0.385896,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.048,
"calib/gap": 0.03551953300404134,
"calib/mean_conf": 0.198024,
"calib/mu_c": 0.21493129770992367,
"calib/mu_w": 0.17941176470588233,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.029959999999999994,
"calib/std_conf": 0.2568820729907013,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2567781329923274,
"calib/step_q_c_n": 782.0,
"calib/step_q_gap": 0.03151570205310089,
"calib/step_q_w": 0.2252624309392265,
"calib/step_q_w_n": 724.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2495.0,
"completions/max_terminated_length": 2495.0,
"completions/mean_length": 425.76171875,
"completions/mean_terminated_length": 427.431396484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.005606526043266058,
"learning_rate": 3.0277777777777776e-06,
"loss": 0.1379,
"num_tokens": 19692606.0,
"reward": 1.0294326543807983,
"reward_std": 0.18703590333461761,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.5783966183662415,
"rewards/format_reward_step": 0.96875,
"step": 91
},
{
"aux_distill/final_loss": 0.00023135661149353837,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08267123310361058,
"aux_distill/mean_u": 0.20467404305081655,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 169.375,
"aux_distill/step_loss": 0.8264809604734182,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5591839353307244,
"calib/avg_num_step_conf": 5.29296875,
"calib/ece": 0.29054563492063495,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.031746031746031744,
"calib/gap": 0.07357448514787968,
"calib/mean_conf": 0.19532738095238097,
"calib/mu_c": 0.23707798165137617,
"calib/mu_w": 0.1635034965034965,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.02666666666666666,
"calib/std_conf": 0.2476480737194419,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.24213996247654787,
"calib/step_q_c_n": 533.0,
"calib/step_q_gap": -0.0040168501755202535,
"calib/step_q_w": 0.24615681265206812,
"calib/step_q_w_n": 822.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2905.0,
"completions/max_terminated_length": 2905.0,
"completions/mean_length": 416.05078125,
"completions/mean_terminated_length": 416.05078125,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.005976582877337933,
"learning_rate": 3e-06,
"loss": 0.1585,
"num_tokens": 19905835.0,
"reward": 1.0285645723342896,
"reward_std": 0.16882646083831787,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6547853350639343,
"rewards/format_reward_step": 0.9765625,
"step": 92
},
{
"aux_distill/final_loss": 0.0001452214287382958,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08422124257776886,
"aux_distill/mean_u": 0.2293708357468701,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 178.375,
"aux_distill/step_loss": 0.8420671913772821,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5493368868583033,
"calib/avg_num_step_conf": 5.578125,
"calib/ece": 0.32116875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0703125,
"calib/gap": 0.020842540770353385,
"calib/mean_conf": 0.2377375,
"calib/mu_c": 0.2501941747572815,
"calib/mu_w": 0.22935163398692812,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.07828124999999998,
"calib/std_conf": 0.2956682947948173,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2604112149532711,
"calib/step_q_c_n": 535.0,
"calib/step_q_gap": -0.02375575033228322,
"calib/step_q_w": 0.2841669652855543,
"calib/step_q_w_n": 893.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1096.0,
"completions/max_terminated_length": 1096.0,
"completions/mean_length": 397.40234375,
"completions/mean_terminated_length": 398.9608154296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.0992,
"grad_norm": 0.005867833737283945,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.1096,
"num_tokens": 20113346.0,
"reward": 1.0210399627685547,
"reward_std": 0.17285411059856415,
"rewards/accuracy_reward_step": 0.40234375,
"rewards/final_brier_reward_step": 0.6475486159324646,
"rewards/format_reward_step": 0.9921875,
"step": 93
},
{
"aux_distill/final_loss": 0.00018924074129245128,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07676532538607717,
"aux_distill/mean_u": 0.20391541947647757,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 174.75,
"aux_distill/step_loss": 0.7674639923498034,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5068449940066874,
"calib/avg_num_step_conf": 5.4609375,
"calib/ece": 0.38861111111111113,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.07539682539682539,
"calib/gap": 0.0010302189136331685,
"calib/mean_conf": 0.21996031746031747,
"calib/mu_c": 0.22049586776859503,
"calib/mu_w": 0.21946564885496186,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.06420634920634921,
"calib/std_conf": 0.2948546830891139,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2472169811320755,
"calib/step_q_c_n": 636.0,
"calib/step_q_gap": 0.0038425716832565793,
"calib/step_q_w": 0.2433744094488189,
"calib/step_q_w_n": 762.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2772.0,
"completions/max_terminated_length": 2772.0,
"completions/mean_length": 408.67578125,
"completions/mean_terminated_length": 408.67578125,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.006220919080078602,
"learning_rate": 2.944444444444445e-06,
"loss": 0.1714,
"num_tokens": 20326647.0,
"reward": 1.0158402919769287,
"reward_std": 0.18431319296360016,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.5824617147445679,
"rewards/format_reward_step": 0.9765625,
"step": 94
},
{
"aux_distill/final_loss": 0.00012870838327216916,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08224111946765333,
"aux_distill/mean_u": 0.28012217788833976,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 172.25,
"aux_distill/step_loss": 0.8222824800759554,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.511155227831348,
"calib/avg_num_step_conf": 5.3828125,
"calib/ece": 0.3976984126984128,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.03571428571428571,
"calib/gap": -0.01264511249763664,
"calib/mean_conf": 0.18460317460317457,
"calib/mu_c": 0.17813008130081298,
"calib/mu_w": 0.19077519379844962,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.047103174603174605,
"calib/std_conf": 0.2611942985159199,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.22593749999999999,
"calib/step_q_c_n": 640.0,
"calib/step_q_gap": 0.01166920731707316,
"calib/step_q_w": 0.21426829268292683,
"calib/step_q_w_n": 738.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2393.0,
"completions/max_terminated_length": 2393.0,
"completions/mean_length": 401.68359375,
"completions/mean_terminated_length": 403.25885009765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.006113733630627394,
"learning_rate": 2.916666666666667e-06,
"loss": 0.117,
"num_tokens": 20535606.0,
"reward": 1.0196096897125244,
"reward_std": 0.16846677660942078,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.5743758082389832,
"rewards/format_reward_step": 0.984375,
"step": 95
},
{
"aux_distill/final_loss": 0.0001608024745110015,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08210943453013897,
"aux_distill/mean_u": 0.2503238241556181,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 159.25,
"aux_distill/step_loss": 0.8209335319697857,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.546373326400624,
"calib/avg_num_step_conf": 4.9921875,
"calib/ece": 0.4583125490196078,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.047058823529411764,
"calib/gap": 0.05711899779019888,
"calib/mean_conf": 0.18435411764705883,
"calib/mu_c": 0.20630573248407644,
"calib/mu_w": 0.14918673469387755,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.013490196078431368,
"calib/std_conf": 0.2506256462564762,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.23626459143968875,
"calib/step_q_c_n": 771.0,
"calib/step_q_gap": 0.05926518315566509,
"calib/step_q_w": 0.17699940828402366,
"calib/step_q_w_n": 507.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1516.0,
"completions/max_terminated_length": 1516.0,
"completions/mean_length": 367.65625,
"completions/mean_terminated_length": 369.0980529785156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.1024,
"grad_norm": 0.0068721650168299675,
"learning_rate": 2.888888888888889e-06,
"loss": 0.1151,
"num_tokens": 20735542.0,
"reward": 1.0744065046310425,
"reward_std": 0.14019350707530975,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.5394378900527954,
"rewards/format_reward_step": 0.99609375,
"step": 96
},
{
"aux_distill/final_loss": 0.00013028520993429993,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08546403562650084,
"aux_distill/mean_u": 0.23556217314631908,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 175.25,
"aux_distill/step_loss": 0.854510054923594,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5022513952308473,
"calib/avg_num_step_conf": 5.50390625,
"calib/ece": 0.3341732283464567,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.05511811023622047,
"calib/gap": 0.003915525114155244,
"calib/mean_conf": 0.19858267716535433,
"calib/mu_c": 0.20083333333333334,
"calib/mu_w": 0.1969178082191781,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.053779527559055115,
"calib/std_conf": 0.26903362904697825,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2331340579710145,
"calib/step_q_c_n": 552.0,
"calib/step_q_gap": 0.009210954120372705,
"calib/step_q_w": 0.2239231038506418,
"calib/step_q_w_n": 857.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1909.0,
"completions/max_terminated_length": 1909.0,
"completions/mean_length": 378.6875,
"completions/mean_terminated_length": 380.1725769042969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.006627393886446953,
"learning_rate": 2.861111111111111e-06,
"loss": 0.1132,
"num_tokens": 20937558.0,
"reward": 1.0214437246322632,
"reward_std": 0.160974383354187,
"rewards/accuracy_reward_step": 0.421875,
"rewards/final_brier_reward_step": 0.6288249492645264,
"rewards/format_reward_step": 0.9921875,
"step": 97
},
{
"aux_distill/final_loss": 0.0001625970314762526,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08227646606974304,
"aux_distill/mean_u": 0.27415824672963807,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 143.375,
"aux_distill/step_loss": 0.8226020485162735,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4887820512820513,
"calib/avg_num_step_conf": 4.671875,
"calib/ece": 0.39055199999999995,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.06,
"calib/gap": -0.011074358974358989,
"calib/mean_conf": 0.214408,
"calib/mu_c": 0.2090923076923077,
"calib/mu_w": 0.22016666666666668,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.04247999999999999,
"calib/std_conf": 0.27076296928494487,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.25492631578947367,
"calib/step_q_c_n": 570.0,
"calib/step_q_gap": 0.05478254582142256,
"calib/step_q_w": 0.2001437699680511,
"calib/step_q_w_n": 626.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 3010.0,
"completions/max_terminated_length": 3010.0,
"completions/mean_length": 371.4921875,
"completions/mean_terminated_length": 377.388916015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.00674501433968544,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0583,
"num_tokens": 21138844.0,
"reward": 1.0168352127075195,
"reward_std": 0.1888578236103058,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.5571080446243286,
"rewards/format_reward_step": 0.96875,
"step": 98
},
{
"aux_distill/final_loss": 0.0002174276502273642,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0827044548932463,
"aux_distill/mean_u": 0.22012309214152356,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 157.0,
"aux_distill/step_loss": 0.8268271088600159,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5498447204968944,
"calib/avg_num_step_conf": 4.90625,
"calib/ece": 0.20948818897637797,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.06692913385826772,
"calib/gap": 0.07727795031055901,
"calib/mean_conf": 0.19059055118110235,
"calib/mu_c": 0.24657142857142858,
"calib/mu_w": 0.16929347826086957,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.06224409448818897,
"calib/std_conf": 0.2712947333651442,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.3062349397590361,
"calib/step_q_c_n": 332.0,
"calib/step_q_gap": 0.09580203932613565,
"calib/step_q_w": 0.21043290043290044,
"calib/step_q_w_n": 924.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2946.0,
"completions/max_terminated_length": 2946.0,
"completions/mean_length": 435.6328125,
"completions/mean_terminated_length": 435.6328125,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.1056,
"grad_norm": 0.006802734453231096,
"learning_rate": 2.805555555555556e-06,
"loss": 0.1235,
"num_tokens": 21356166.0,
"reward": 0.9909521341323853,
"reward_std": 0.16204184293746948,
"rewards/accuracy_reward_step": 0.2734375,
"rewards/final_brier_reward_step": 0.7319043278694153,
"rewards/format_reward_step": 0.9765625,
"step": 99
},
{
"aux_distill/final_loss": 0.00017292160646320553,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08561063604429364,
"aux_distill/mean_u": 0.27638954939157084,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 167.375,
"aux_distill/step_loss": 0.8559334147721529,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5194937805557275,
"calib/avg_num_step_conf": 5.265625,
"calib/ece": 0.335913671875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.046875,
"calib/gap": 0.04397773995915588,
"calib/mean_conf": 0.20025820312499998,
"calib/mu_c": 0.22482389380530973,
"calib/mu_w": 0.18084615384615385,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0473828125,
"calib/std_conf": 0.27833485020557297,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.25755390070921985,
"calib/step_q_c_n": 564.0,
"calib/step_q_gap": 0.0012479058112606634,
"calib/step_q_w": 0.2563059948979592,
"calib/step_q_w_n": 784.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1347.0,
"completions/max_terminated_length": 1347.0,
"completions/mean_length": 403.72265625,
"completions/mean_terminated_length": 405.305908203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.006777219939976931,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0815,
"num_tokens": 21566927.0,
"reward": 1.0404518842697144,
"reward_std": 0.14150357246398926,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/final_brier_reward_step": 0.6394974589347839,
"rewards/format_reward_step": 1.0,
"step": 100
},
{
"aux_distill/final_loss": 0.00014713435496105376,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08331552986055613,
"aux_distill/mean_u": 0.23223354301709642,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 172.25,
"aux_distill/step_loss": 0.8330081449821591,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5067667654187988,
"calib/avg_num_step_conf": 5.40625,
"calib/ece": 0.3251880952380952,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.05555555555555555,
"calib/gap": -0.010078534877457634,
"calib/mean_conf": 0.2034626984126984,
"calib/mu_c": 0.19714361702127656,
"calib/mu_w": 0.2072221518987342,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.07781746031746031,
"calib/std_conf": 0.2780667059783874,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2471811320754717,
"calib/step_q_c_n": 477.0,
"calib/step_q_gap": 0.04255268665099543,
"calib/step_q_w": 0.20462844542447628,
"calib/step_q_w_n": 907.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2993.0,
"completions/max_terminated_length": 2993.0,
"completions/mean_length": 446.56640625,
"completions/mean_terminated_length": 450.0826721191406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.006341983564198017,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.1127,
"num_tokens": 21788240.0,
"reward": 0.9958369731903076,
"reward_std": 0.17155815660953522,
"rewards/accuracy_reward_step": 0.3671875,
"rewards/final_brier_reward_step": 0.6440176963806152,
"rewards/format_reward_step": 0.98046875,
"step": 101
},
{
"aux_distill/final_loss": 0.007553740155685773,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07908109948039055,
"aux_distill/mean_u": 0.22337070525279093,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 160.5,
"aux_distill/step_loss": 0.7832572367042303,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5951015829490215,
"calib/avg_num_step_conf": 5.015625,
"calib/ece": 0.39274901960784314,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.06666666666666667,
"calib/gap": 0.10104025925464286,
"calib/mean_conf": 0.2143098039215686,
"calib/mu_c": 0.2590845070422535,
"calib/mu_w": 0.15804424778761064,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.02509803921568627,
"calib/std_conf": 0.2872497343843561,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2627881619937695,
"calib/step_q_c_n": 642.0,
"calib/step_q_gap": 0.02522570093457946,
"calib/step_q_w": 0.23756246105919002,
"calib/step_q_w_n": 642.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2382.0,
"completions/max_terminated_length": 2382.0,
"completions/mean_length": 377.078125,
"completions/mean_terminated_length": 377.078125,
"completions/min_length": 84.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.1088,
"grad_norm": 0.007032997440546751,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.1412,
"num_tokens": 21991468.0,
"reward": 1.069368839263916,
"reward_std": 0.1699509620666504,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.5957688093185425,
"rewards/format_reward_step": 0.98828125,
"step": 102
},
{
"aux_distill/final_loss": 0.009660119393060995,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.09102308715227991,
"aux_distill/mean_u": 0.280279918769106,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 148.125,
"aux_distill/step_loss": 0.9005707409232855,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.502552467385139,
"calib/avg_num_step_conf": 4.62890625,
"calib/ece": 0.40069007936507933,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.07142857142857142,
"calib/gap": -0.027315201361315933,
"calib/mean_conf": 0.20438928571428572,
"calib/mu_c": 0.19040650406504067,
"calib/mu_w": 0.2177217054263566,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.0584920634920635,
"calib/std_conf": 0.27734273551106337,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.25415204678362574,
"calib/step_q_c_n": 513.0,
"calib/step_q_gap": -0.03337191154970759,
"calib/step_q_w": 0.28752395833333333,
"calib/step_q_w_n": 672.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2869.0,
"completions/max_terminated_length": 2869.0,
"completions/mean_length": 463.85546875,
"completions/mean_terminated_length": 463.85546875,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.006891104858368635,
"learning_rate": 2.6944444444444444e-06,
"loss": 0.1201,
"num_tokens": 22214767.0,
"reward": 1.0047416687011719,
"reward_std": 0.19548439979553223,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.560264527797699,
"rewards/format_reward_step": 0.96875,
"step": 103
},
{
"aux_distill/final_loss": 0.00011444762037626788,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08198370668105781,
"aux_distill/mean_u": 0.23481959256142368,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 162.625,
"aux_distill/step_loss": 0.8197226040065289,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5480793293293293,
"calib/avg_num_step_conf": 5.08203125,
"calib/ece": 0.35988235294117643,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.08235294117647059,
"calib/gap": 0.01469782282282281,
"calib/mean_conf": 0.20341176470588238,
"calib/mu_c": 0.21171171171171171,
"calib/mu_w": 0.1970138888888889,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.064,
"calib/std_conf": 0.2972681615336423,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.28513866666666665,
"calib/step_q_c_n": 525.0,
"calib/step_q_gap": 0.0488757800687285,
"calib/step_q_w": 0.23626288659793815,
"calib/step_q_w_n": 776.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1235.0,
"completions/max_terminated_length": 1235.0,
"completions/mean_length": 408.1015625,
"completions/mean_terminated_length": 409.7019958496094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.0072467573918402195,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0866,
"num_tokens": 22425921.0,
"reward": 1.0171546936035156,
"reward_std": 0.17794030904769897,
"rewards/accuracy_reward_step": 0.43359375,
"rewards/final_brier_reward_step": 0.6124343872070312,
"rewards/format_reward_step": 0.98828125,
"step": 104
},
{
"aux_distill/final_loss": 0.012328775661444524,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08135199698153883,
"aux_distill/mean_u": 0.21711008744151833,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 166.375,
"aux_distill/step_loss": 0.8011911753565073,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5233015873015873,
"calib/avg_num_step_conf": 5.203125,
"calib/ece": 0.41362231075697214,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.09163346613545817,
"calib/gap": 0.013755746031746036,
"calib/mean_conf": 0.2204812749003984,
"calib/mu_c": 0.22733174603174602,
"calib/mu_w": 0.213576,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.06605577689243027,
"calib/std_conf": 0.31173055904450453,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2567628424657534,
"calib/step_q_c_n": 584.0,
"calib/step_q_gap": -0.05514491154494172,
"calib/step_q_w": 0.31190775401069515,
"calib/step_q_w_n": 748.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2354.0,
"completions/max_terminated_length": 2354.0,
"completions/mean_length": 460.84375,
"completions/mean_terminated_length": 464.4724426269531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.112,
"grad_norm": 0.006027123890817165,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.1147,
"num_tokens": 22649657.0,
"reward": 1.018819808959961,
"reward_std": 0.21033445000648499,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.5688895583152771,
"rewards/format_reward_step": 0.9765625,
"step": 105
},
{
"aux_distill/final_loss": 0.00011433741019573063,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08694498904515058,
"aux_distill/mean_u": 0.25351431589983237,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 144.75,
"aux_distill/step_loss": 0.8693355321884155,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5080436565516371,
"calib/avg_num_step_conf": 4.5234375,
"calib/ece": 0.358015873015873,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.051587301587301584,
"calib/gap": 0.020511008769162842,
"calib/mean_conf": 0.21603174603174605,
"calib/mu_c": 0.22669421487603306,
"calib/mu_w": 0.20618320610687021,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.04694444444444444,
"calib/std_conf": 0.2687377294801713,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.29774422735346356,
"calib/step_q_c_n": 563.0,
"calib/step_q_gap": 0.04271901726942995,
"calib/step_q_w": 0.2550252100840336,
"calib/step_q_w_n": 595.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2663.0,
"completions/max_terminated_length": 2663.0,
"completions/mean_length": 399.9296875,
"completions/mean_terminated_length": 401.4980773925781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.007543689571321011,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.1226,
"num_tokens": 22856623.0,
"reward": 1.0349606275558472,
"reward_std": 0.16685569286346436,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.60898357629776,
"rewards/format_reward_step": 0.984375,
"step": 106
},
{
"aux_distill/final_loss": 0.015143587786269563,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08319069840945303,
"aux_distill/mean_u": 0.24450310729418495,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 156.625,
"aux_distill/step_loss": 0.8167633889243007,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4672549019607843,
"calib/avg_num_step_conf": 4.89453125,
"calib/ece": 0.437390513833992,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.07509881422924901,
"calib/gap": -0.008791777777777815,
"calib/mean_conf": 0.245297233201581,
"calib/mu_c": 0.2418222222222222,
"calib/mu_w": 0.250614,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03897233201581027,
"calib/std_conf": 0.29765119935010886,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3058320113314447,
"calib/step_q_c_n": 706.0,
"calib/step_q_gap": 0.004860530527057205,
"calib/step_q_w": 0.3009714808043875,
"calib/step_q_w_n": 547.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2283.0,
"completions/max_terminated_length": 2283.0,
"completions/mean_length": 412.12890625,
"completions/mean_terminated_length": 412.12890625,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.006644075736403465,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.1244,
"num_tokens": 23066744.0,
"reward": 1.0612491369247437,
"reward_std": 0.1764146387577057,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.5326545238494873,
"rewards/format_reward_step": 0.98828125,
"step": 107
},
{
"aux_distill/final_loss": 0.00011162566943312413,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07747206592466682,
"aux_distill/mean_u": 0.22652066254173892,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 174.25,
"aux_distill/step_loss": 0.7746090218424797,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5271950271950272,
"calib/avg_num_step_conf": 5.4453125,
"calib/ece": 0.4207996078431372,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.10588235294117647,
"calib/gap": 0.06467196969696962,
"calib/mean_conf": 0.25653372549019604,
"calib/mu_c": 0.2816416666666666,
"calib/mu_w": 0.216969696969697,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.032784313725490205,
"calib/std_conf": 0.3157732207471411,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.35206224611708486,
"calib/step_q_c_n": 837.0,
"calib/step_q_gap": 0.03558109710451751,
"calib/step_q_w": 0.31648114901256735,
"calib/step_q_w_n": 557.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1422.0,
"completions/max_terminated_length": 1422.0,
"completions/mean_length": 450.72265625,
"completions/mean_terminated_length": 452.490234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.1152,
"grad_norm": 0.006208624690771103,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0493,
"num_tokens": 23285361.0,
"reward": 1.069959044456482,
"reward_std": 0.20371109247207642,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.5500743389129639,
"rewards/format_reward_step": 0.98046875,
"step": 108
},
{
"aux_distill/final_loss": 0.009648739696444864,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0812309457687661,
"aux_distill/mean_u": 0.21662739191092542,
"aux_distill/n_active_final_tok": 30.375,
"aux_distill/n_active_tok": 167.875,
"aux_distill/step_loss": 0.8026606999337673,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5209545214172395,
"calib/avg_num_step_conf": 5.31640625,
"calib/ece": 0.3786040650406505,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.11788617886178862,
"calib/gap": 0.0037177154944474933,
"calib/mean_conf": 0.2769243902439025,
"calib/mu_c": 0.2787983606557377,
"calib/mu_w": 0.2750806451612902,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.07979674796747968,
"calib/std_conf": 0.3287158839057603,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.32818075709779176,
"calib/step_q_c_n": 634.0,
"calib/step_q_gap": 0.06785063330136809,
"calib/step_q_w": 0.2603301237964237,
"calib/step_q_w_n": 727.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2738.0,
"completions/max_terminated_length": 2738.0,
"completions/mean_length": 478.3203125,
"completions/mean_terminated_length": 487.8486328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.007237493991851807,
"learning_rate": 2.5277777777777778e-06,
"loss": 0.1332,
"num_tokens": 23512411.0,
"reward": 0.9953612089157104,
"reward_std": 0.25147342681884766,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.5649412274360657,
"rewards/format_reward_step": 0.94921875,
"step": 109
},
{
"aux_distill/final_loss": 0.009435338434059304,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08010290749371052,
"aux_distill/mean_u": 0.21185826195867102,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 144.25,
"aux_distill/step_loss": 0.7915937229990959,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5496252261566296,
"calib/avg_num_step_conf": 4.6796875,
"calib/ece": 0.2886376984126984,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.09126984126984126,
"calib/gap": 0.062404380976996576,
"calib/mean_conf": 0.2562035714285714,
"calib/mu_c": 0.2923584905660377,
"calib/mu_w": 0.2299541095890411,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.06210317460317461,
"calib/std_conf": 0.31680728110360845,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.3212387387387387,
"calib/step_q_c_n": 444.0,
"calib/step_q_gap": 0.0304860862188448,
"calib/step_q_w": 0.2907526525198939,
"calib/step_q_w_n": 754.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2423.0,
"completions/max_terminated_length": 2423.0,
"completions/mean_length": 445.17578125,
"completions/mean_terminated_length": 446.9216003417969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 65.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.006269925739616156,
"learning_rate": 2.5e-06,
"loss": 0.097,
"num_tokens": 23731296.0,
"reward": 1.017866849899292,
"reward_std": 0.2148369401693344,
"rewards/accuracy_reward_step": 0.4140625,
"rewards/final_brier_reward_step": 0.6451085805892944,
"rewards/format_reward_step": 0.9765625,
"step": 110
},
{
"aux_distill/final_loss": 0.00016764799386237428,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07741684489883482,
"aux_distill/mean_u": 0.24787259585562457,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 148.125,
"aux_distill/step_loss": 0.7740007806569338,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.48793092582088615,
"calib/avg_num_step_conf": 4.6328125,
"calib/ece": 0.38767103174603174,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.06349206349206349,
"calib/gap": -0.0182864624692759,
"calib/mean_conf": 0.23494801587301586,
"calib/mu_c": 0.226022480620155,
"calib/mu_w": 0.2443089430894309,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.05535714285714284,
"calib/std_conf": 0.2902151359166349,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.28327626811594203,
"calib/step_q_c_n": 552.0,
"calib/step_q_gap": 0.0021421987153111277,
"calib/step_q_w": 0.2811340694006309,
"calib/step_q_w_n": 634.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2987.0,
"completions/max_terminated_length": 2987.0,
"completions/mean_length": 511.796875,
"completions/mean_terminated_length": 511.796875,
"completions/min_length": 156.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.1184,
"grad_norm": 0.005823335610330105,
"learning_rate": 2.4722222222222226e-06,
"loss": 0.1564,
"num_tokens": 23969724.0,
"reward": 1.0191161632537842,
"reward_std": 0.20448820292949677,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.5616699457168579,
"rewards/format_reward_step": 0.97265625,
"step": 111
},
{
"aux_distill/final_loss": 0.00019943361803598236,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0781205742387101,
"aux_distill/mean_u": 0.26338176908493466,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 154.125,
"aux_distill/step_loss": 0.7810062952339649,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5561535011852137,
"calib/avg_num_step_conf": 4.921875,
"calib/ece": 0.33006480000000005,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.056,
"calib/gap": 0.07037543724774167,
"calib/mean_conf": 0.20801519999999998,
"calib/mu_c": 0.2443289256198347,
"calib/mu_w": 0.17395348837209304,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.02704,
"calib/std_conf": 0.2771900833885657,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.2894207272727273,
"calib/step_q_c_n": 550.0,
"calib/step_q_gap": 0.060798614596670936,
"calib/step_q_w": 0.22862211267605634,
"calib/step_q_w_n": 710.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2891.0,
"completions/max_terminated_length": 2891.0,
"completions/mean_length": 485.859375,
"completions/mean_terminated_length": 489.6850280761719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.006966287270188332,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.1186,
"num_tokens": 24202024.0,
"reward": 1.0294950008392334,
"reward_std": 0.19632869958877563,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.6136775016784668,
"rewards/format_reward_step": 0.97265625,
"step": 112
},
{
"aux_distill/final_loss": 0.006108010040748013,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07722375844605267,
"aux_distill/mean_u": 0.22336582107667688,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 167.5,
"aux_distill/step_loss": 0.766129563562572,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4713377882599581,
"calib/avg_num_step_conf": 5.43359375,
"calib/ece": 0.442466,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.068,
"calib/gap": -0.017289635744234783,
"calib/mean_conf": 0.222494,
"calib/mu_c": 0.21516319444444443,
"calib/mu_w": 0.2324528301886792,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.04447999999999999,
"calib/std_conf": 0.2998736883489447,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2770047493403694,
"calib/step_q_c_n": 758.0,
"calib/step_q_gap": 0.02510901474321303,
"calib/step_q_w": 0.25189573459715636,
"calib/step_q_w_n": 633.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3005.0,
"completions/max_terminated_length": 3005.0,
"completions/mean_length": 467.87890625,
"completions/mean_terminated_length": 473.4269104003906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.006655262317508459,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.1486,
"num_tokens": 24427001.0,
"reward": 1.0256073474884033,
"reward_std": 0.21474526822566986,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.5160583853721619,
"rewards/format_reward_step": 0.97265625,
"step": 113
},
{
"aux_distill/final_loss": 0.00025458406253164867,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0691154002561234,
"aux_distill/mean_u": 0.19843236018875485,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 168.625,
"aux_distill/step_loss": 0.6908994019031525,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.47466216216216217,
"calib/avg_num_step_conf": 5.26953125,
"calib/ece": 0.4669444444444444,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.04365079365079365,
"calib/gap": -0.017780665280665242,
"calib/mean_conf": 0.18686507936507937,
"calib/mu_c": 0.17952702702702705,
"calib/mu_w": 0.1973076923076923,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.03325396825396826,
"calib/std_conf": 0.27517976384782167,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2416880308880309,
"calib/step_q_c_n": 777.0,
"calib/step_q_gap": -0.02471057051057049,
"calib/step_q_w": 0.2663986013986014,
"calib/step_q_w_n": 572.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2955.0,
"completions/max_terminated_length": 2955.0,
"completions/mean_length": 466.74609375,
"completions/mean_terminated_length": 470.4212646484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.1216,
"grad_norm": 0.007292897906154394,
"learning_rate": 2.388888888888889e-06,
"loss": 0.0881,
"num_tokens": 24651512.0,
"reward": 1.031526803970337,
"reward_std": 0.16938289999961853,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.5044597387313843,
"rewards/format_reward_step": 0.98046875,
"step": 114
},
{
"aux_distill/final_loss": 0.00021639067404066736,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06915236706845462,
"aux_distill/mean_u": 0.1697514341712585,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 178.25,
"aux_distill/step_loss": 0.6913072746247053,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5266768292682927,
"calib/avg_num_step_conf": 5.70703125,
"calib/ece": 0.3867912350597611,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.05976095617529881,
"calib/gap": 0.008917117632113891,
"calib/mean_conf": 0.20269083665338644,
"calib/mu_c": 0.20723821138211387,
"calib/mu_w": 0.19832109374999998,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.04972111553784861,
"calib/std_conf": 0.28575867224050194,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.28042391653290527,
"calib/step_q_c_n": 623.0,
"calib/step_q_gap": 0.03259336760689094,
"calib/step_q_w": 0.24783054892601433,
"calib/step_q_w_n": 838.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2338.0,
"completions/max_terminated_length": 2338.0,
"completions/mean_length": 481.078125,
"completions/mean_terminated_length": 486.7826232910156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.006700317841023207,
"learning_rate": 2.361111111111111e-06,
"loss": 0.0762,
"num_tokens": 24879932.0,
"reward": 1.0177994966506958,
"reward_std": 0.20145021378993988,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.5785678029060364,
"rewards/format_reward_step": 0.9765625,
"step": 115
},
{
"aux_distill/final_loss": 0.0003319979881553081,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07306196971330792,
"aux_distill/mean_u": 0.1895094989100424,
"aux_distill/n_active_final_tok": 30.375,
"aux_distill/n_active_tok": 196.75,
"aux_distill/step_loss": 0.7302876887843013,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5061617968594713,
"calib/avg_num_step_conf": 6.578125,
"calib/ece": 0.4580081300813008,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.07723577235772358,
"calib/gap": -0.031216457960644012,
"calib/mean_conf": 0.18004065040650405,
"calib/mu_c": 0.16519379844961243,
"calib/mu_w": 0.19641025641025645,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.05682926829268293,
"calib/std_conf": 0.2798409797676183,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.24775482093663911,
"calib/step_q_c_n": 726.0,
"calib/step_q_gap": -0.035533279272129153,
"calib/step_q_w": 0.28328810020876827,
"calib/step_q_w_n": 958.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 3057.0,
"completions/max_terminated_length": 3057.0,
"completions/mean_length": 557.6640625,
"completions/mean_terminated_length": 568.77294921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.006594009697437286,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.092,
"num_tokens": 25127214.0,
"reward": 0.980388879776001,
"reward_std": 0.2139553725719452,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.507652759552002,
"rewards/format_reward_step": 0.94921875,
"step": 116
},
{
"aux_distill/final_loss": 0.0018335038339500898,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0666623511351645,
"aux_distill/mean_u": 0.16838721496509335,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 212.375,
"aux_distill/step_loss": 0.6647899970412254,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5332070707070707,
"calib/avg_num_step_conf": 6.63671875,
"calib/ece": 0.34901574803149604,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.047244094488188976,
"calib/gap": 0.034113636363636374,
"calib/mean_conf": 0.1660236220472441,
"calib/mu_c": 0.18536363636363637,
"calib/mu_w": 0.15125,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.04098425196850393,
"calib/std_conf": 0.2629981876099023,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2559718309859155,
"calib/step_q_c_n": 710.0,
"calib/step_q_gap": 0.03504159842777599,
"calib/step_q_w": 0.22093023255813954,
"calib/step_q_w_n": 989.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2143.0,
"completions/max_terminated_length": 2143.0,
"completions/mean_length": 551.36328125,
"completions/mean_terminated_length": 553.5255126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.0,
"epoch": 0.1248,
"grad_norm": 0.00625663623213768,
"learning_rate": 2.305555555555556e-06,
"loss": 0.0713,
"num_tokens": 25374963.0,
"reward": 1.0238478183746338,
"reward_std": 0.1417831927537918,
"rewards/accuracy_reward_step": 0.4296875,
"rewards/final_brier_reward_step": 0.6258206963539124,
"rewards/format_reward_step": 0.9921875,
"step": 117
},
{
"aux_distill/final_loss": 0.0007958354216270891,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.05608076159842312,
"aux_distill/mean_u": 0.13667836461954033,
"aux_distill/n_active_final_tok": 30.875,
"aux_distill/n_active_tok": 286.0,
"aux_distill/step_loss": 0.5600117654539645,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.537762467191601,
"calib/avg_num_step_conf": 9.67578125,
"calib/ece": 0.4103603238866397,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.08906882591093117,
"calib/gap": 0.03999035433070866,
"calib/mean_conf": 0.19773684210526313,
"calib/mu_c": 0.2171653543307087,
"calib/mu_w": 0.17717500000000003,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.046963562753036446,
"calib/std_conf": 0.29979166784326466,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2636060606060606,
"calib/step_q_c_n": 990.0,
"calib/step_q_gap": 0.08017633632899271,
"calib/step_q_w": 0.1834297242770679,
"calib/step_q_w_n": 1487.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2800.0,
"completions/max_terminated_length": 2800.0,
"completions/mean_length": 613.80078125,
"completions/mean_terminated_length": 621.0791015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.009130639024078846,
"learning_rate": 2.277777777777778e-06,
"loss": 0.1473,
"num_tokens": 25636104.0,
"reward": 1.0064516067504883,
"reward_std": 0.21105365455150604,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.555871844291687,
"rewards/format_reward_step": 0.9609375,
"step": 118
},
{
"aux_distill/final_loss": 0.000528632641362492,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06704626954160631,
"aux_distill/mean_u": 0.19759312360890224,
"aux_distill/n_active_final_tok": 30.625,
"aux_distill/n_active_tok": 214.75,
"aux_distill/step_loss": 0.6699340520426631,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.5888421474358975,
"calib/avg_num_step_conf": 7.40625,
"calib/ece": 0.3749795918367347,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.044897959183673466,
"calib/gap": 0.06137887286324792,
"calib/mean_conf": 0.15579591836734694,
"calib/mu_c": 0.1878632478632479,
"calib/mu_w": 0.12648437499999998,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.02661224489795918,
"calib/std_conf": 0.24266858054420698,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.23157199471598416,
"calib/step_q_c_n": 757.0,
"calib/step_q_gap": 0.06798990516374537,
"calib/step_q_w": 0.1635820895522388,
"calib/step_q_w_n": 1139.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2858.0,
"completions/max_terminated_length": 2858.0,
"completions/mean_length": 610.8046875,
"completions/mean_terminated_length": 622.9721069335938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.008736925199627876,
"learning_rate": 2.25e-06,
"loss": 0.1025,
"num_tokens": 25897534.0,
"reward": 0.9991916418075562,
"reward_std": 0.20187215507030487,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/final_brier_reward_step": 0.5882269144058228,
"rewards/format_reward_step": 0.953125,
"step": 119
},
{
"aux_distill/final_loss": 0.0005055578319570486,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.057472184766083956,
"aux_distill/mean_u": 0.15117055273225058,
"aux_distill/n_active_final_tok": 29.375,
"aux_distill/n_active_tok": 252.125,
"aux_distill/step_loss": 0.5742162819951773,
"calib/answer_extract_rate": 0.921875,
"calib/auroc": 0.5447395376060872,
"calib/avg_num_step_conf": 9.30859375,
"calib/ece": 0.48006355932203393,
"calib/final_conf_rate": 0.921875,
"calib/format_rate": 0.91796875,
"calib/frac_conf_gt_0.9": 0.05084745762711865,
"calib/gap": 0.022365378987415863,
"calib/mean_conf": 0.1495127118644068,
"calib/mu_c": 0.15917910447761194,
"calib/mu_w": 0.13681372549019608,
"calib/nonempty_final_conf_rate": 0.921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.030889830508474585,
"calib/std_conf": 0.25454153670119367,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.23720441988950275,
"calib/step_q_c_n": 905.0,
"calib/step_q_gap": 0.11476856062021992,
"calib/step_q_w": 0.12243585926928283,
"calib/step_q_w_n": 1478.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05078125,
"completions/max_length": 3026.0,
"completions/max_terminated_length": 3026.0,
"completions/mean_length": 593.75390625,
"completions/mean_terminated_length": 625.5184936523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 238.0,
"epoch": 0.128,
"grad_norm": 0.005892501212656498,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0633,
"num_tokens": 26156223.0,
"reward": 0.9611204266548157,
"reward_std": 0.26143747568130493,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.4808346629142761,
"rewards/format_reward_step": 0.91796875,
"step": 120
},
{
"aux_distill/final_loss": 0.001512952457233041,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06258805538527668,
"aux_distill/mean_u": 0.21936762366366194,
"aux_distill/n_active_final_tok": 29.125,
"aux_distill/n_active_tok": 303.0,
"aux_distill/step_loss": 0.6243675984442234,
"calib/answer_extract_rate": 0.921875,
"calib/auroc": 0.5526315789473685,
"calib/avg_num_step_conf": 10.44921875,
"calib/ece": 0.4203800847457627,
"calib/final_conf_rate": 0.921875,
"calib/format_rate": 0.90625,
"calib/frac_conf_gt_0.9": 0.0635593220338983,
"calib/gap": 0.026903523152142617,
"calib/mean_conf": 0.17241652542372882,
"calib/mu_c": 0.18541229508196722,
"calib/mu_w": 0.1585087719298246,
"calib/nonempty_final_conf_rate": 0.921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.03792372881355932,
"calib/std_conf": 0.26805675424823155,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.24635463071512306,
"calib/step_q_c_n": 853.0,
"calib/step_q_gap": 0.04392323664267517,
"calib/step_q_w": 0.2024313940724479,
"calib/step_q_w_n": 1822.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2942.0,
"completions/max_terminated_length": 2942.0,
"completions/mean_length": 690.86328125,
"completions/mean_terminated_length": 718.9471435546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.005467615555971861,
"learning_rate": 2.1944444444444445e-06,
"loss": 0.1284,
"num_tokens": 26438140.0,
"reward": 0.9531474113464355,
"reward_std": 0.2858285903930664,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.5234823226928711,
"rewards/format_reward_step": 0.90625,
"step": 121
},
{
"aux_distill/final_loss": 0.0019614799234659586,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06464390846667811,
"aux_distill/mean_u": 0.2041460817987188,
"aux_distill/n_active_final_tok": 29.75,
"aux_distill/n_active_tok": 266.5,
"aux_distill/step_loss": 0.6444775881245732,
"calib/answer_extract_rate": 0.921875,
"calib/auroc": 0.5519595359366157,
"calib/avg_num_step_conf": 10.64453125,
"calib/ece": 0.4018483193277311,
"calib/final_conf_rate": 0.9296875,
"calib/format_rate": 0.91796875,
"calib/frac_conf_gt_0.9": 0.058823529411764705,
"calib/gap": 0.05940233446519527,
"calib/mean_conf": 0.17437016806722688,
"calib/mu_c": 0.20282338709677422,
"calib/mu_w": 0.14342105263157895,
"calib/nonempty_final_conf_rate": 0.9296875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.027605042016806726,
"calib/std_conf": 0.26916321153937484,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.25753532863849765,
"calib/step_q_c_n": 852.0,
"calib/step_q_gap": 0.09895524321404492,
"calib/step_q_w": 0.15858008542445273,
"calib/step_q_w_n": 1873.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 2978.0,
"completions/max_terminated_length": 2978.0,
"completions/mean_length": 588.94140625,
"completions/mean_terminated_length": 625.5975341796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 266.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.0055676959455013275,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0066,
"num_tokens": 26696253.0,
"reward": 0.9706401824951172,
"reward_std": 0.26492780447006226,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5389367341995239,
"rewards/format_reward_step": 0.91796875,
"step": 122
},
{
"aux_distill/final_loss": 0.0024005121531445184,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06945573759730905,
"aux_distill/mean_u": 0.2149112364506133,
"aux_distill/n_active_final_tok": 30.125,
"aux_distill/n_active_tok": 231.0,
"aux_distill/step_loss": 0.6921568466350436,
"calib/answer_extract_rate": 0.94140625,
"calib/auroc": 0.4637264957264957,
"calib/avg_num_step_conf": 7.9921875,
"calib/ece": 0.39942148760330587,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.93359375,
"calib/frac_conf_gt_0.9": 0.06611570247933884,
"calib/gap": -0.017327863247863284,
"calib/mean_conf": 0.20570247933884298,
"calib/mu_c": 0.19675213675213674,
"calib/mu_w": 0.21408000000000002,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.060826446280991736,
"calib/std_conf": 0.2879776347097573,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.24959493670886077,
"calib/step_q_c_n": 790.0,
"calib/step_q_gap": -0.012124808514069135,
"calib/step_q_w": 0.2617197452229299,
"calib/step_q_w_n": 1256.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 2603.0,
"completions/max_terminated_length": 2603.0,
"completions/mean_length": 612.5390625,
"completions/mean_terminated_length": 642.6638793945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 273.0,
"epoch": 0.1312,
"grad_norm": 0.005516073666512966,
"learning_rate": 2.138888888888889e-06,
"loss": 0.0124,
"num_tokens": 26958351.0,
"reward": 0.9662868976593018,
"reward_std": 0.23128430545330048,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/final_brier_reward_step": 0.5419487953186035,
"rewards/format_reward_step": 0.93359375,
"step": 123
},
{
"aux_distill/final_loss": 0.0008837626273816568,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06087582663167268,
"aux_distill/mean_u": 0.1862628107519184,
"aux_distill/n_active_final_tok": 29.875,
"aux_distill/n_active_tok": 253.125,
"aux_distill/step_loss": 0.6078744931146502,
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5328686720469552,
"calib/avg_num_step_conf": 8.91015625,
"calib/ece": 0.4679916317991631,
"calib/final_conf_rate": 0.93359375,
"calib/format_rate": 0.93359375,
"calib/frac_conf_gt_0.9": 0.08786610878661087,
"calib/gap": 0.026069699192956597,
"calib/mean_conf": 0.21953974895397488,
"calib/mu_c": 0.2297931034482758,
"calib/mu_w": 0.2037234042553192,
"calib/nonempty_final_conf_rate": 0.93359375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.040418410041841005,
"calib/std_conf": 0.30176918617622306,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.26054347826086954,
"calib/step_q_c_n": 1012.0,
"calib/step_q_gap": 0.05541250899373007,
"calib/step_q_w": 0.20513096926713947,
"calib/step_q_w_n": 1269.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2980.0,
"completions/max_terminated_length": 2980.0,
"completions/mean_length": 628.9921875,
"completions/mean_terminated_length": 646.6746826171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.005148047115653753,
"learning_rate": 2.1111111111111114e-06,
"loss": 0.1321,
"num_tokens": 27226189.0,
"reward": 0.9987428188323975,
"reward_std": 0.2690247893333435,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.49748554825782776,
"rewards/format_reward_step": 0.93359375,
"step": 124
},
{
"aux_distill/final_loss": 0.0019698337684985745,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.058478535269387066,
"aux_distill/mean_u": 0.1786380818921242,
"aux_distill/n_active_final_tok": 29.5,
"aux_distill/n_active_tok": 246.0,
"aux_distill/step_loss": 0.5828155069611967,
"calib/answer_extract_rate": 0.92578125,
"calib/auroc": 0.5291393559418638,
"calib/avg_num_step_conf": 8.8515625,
"calib/ece": 0.3565527426160337,
"calib/final_conf_rate": 0.92578125,
"calib/format_rate": 0.92578125,
"calib/frac_conf_gt_0.9": 0.08438818565400844,
"calib/gap": 0.03483884297520659,
"calib/mean_conf": 0.22471308016877636,
"calib/mu_c": 0.2425,
"calib/mu_w": 0.2076611570247934,
"calib/nonempty_final_conf_rate": 0.92578125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0459071729957806,
"calib/std_conf": 0.29503832611974684,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.294019728729963,
"calib/step_q_c_n": 811.0,
"calib/step_q_gap": 0.045377117046114246,
"calib/step_q_w": 0.24864261168384877,
"calib/step_q_w_n": 1455.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03515625,
"completions/max_length": 2669.0,
"completions/max_terminated_length": 2669.0,
"completions/mean_length": 652.390625,
"completions/mean_terminated_length": 676.1619873046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 237.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.005496951751410961,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.1146,
"num_tokens": 27498009.0,
"reward": 0.9719964265823364,
"reward_std": 0.24279111623764038,
"rewards/accuracy_reward_step": 0.453125,
"rewards/final_brier_reward_step": 0.5650866031646729,
"rewards/format_reward_step": 0.92578125,
"step": 125
},
{
"aux_distill/final_loss": 0.0008635271465209371,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06402088113827631,
"aux_distill/mean_u": 0.20865287611760336,
"aux_distill/n_active_final_tok": 29.625,
"aux_distill/n_active_tok": 265.0,
"aux_distill/step_loss": 0.6393452696502209,
"calib/answer_extract_rate": 0.9296875,
"calib/auroc": 0.5258731290092659,
"calib/avg_num_step_conf": 9.03515625,
"calib/ece": 0.3979261603375528,
"calib/final_conf_rate": 0.92578125,
"calib/format_rate": 0.92578125,
"calib/frac_conf_gt_0.9": 0.10970464135021098,
"calib/gap": 0.014444526015680637,
"calib/mean_conf": 0.26207383966244724,
"calib/mu_c": 0.26908278688524584,
"calib/mu_w": 0.2546382608695652,
"calib/nonempty_final_conf_rate": 0.92578125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.07261603375527426,
"calib/std_conf": 0.3239697482738927,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3166586538461538,
"calib/step_q_c_n": 936.0,
"calib/step_q_gap": -0.021296320736271745,
"calib/step_q_w": 0.33795497458242557,
"calib/step_q_w_n": 1377.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2980.0,
"completions/max_terminated_length": 2980.0,
"completions/mean_length": 616.87890625,
"completions/mean_terminated_length": 641.9552612304688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.0,
"epoch": 0.1344,
"grad_norm": 0.005289388354867697,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0427,
"num_tokens": 27761394.0,
"reward": 0.9755932092666626,
"reward_std": 0.2762099504470825,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.54493647813797,
"rewards/format_reward_step": 0.92578125,
"step": 126
},
{
"aux_distill/final_loss": 0.0005444334367439296,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06492951465770602,
"aux_distill/mean_u": 0.18184597767731114,
"aux_distill/n_active_final_tok": 30.25,
"aux_distill/n_active_tok": 208.25,
"aux_distill/step_loss": 0.6487507000565529,
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.5325573433755564,
"calib/avg_num_step_conf": 7.95703125,
"calib/ece": 0.329253305785124,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.07851239669421488,
"calib/gap": 0.03287363916466965,
"calib/mean_conf": 0.25405247933884295,
"calib/mu_c": 0.271304347826087,
"calib/mu_w": 0.23843070866141733,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.054049586776859504,
"calib/std_conf": 0.29892154795647613,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.31726683937823835,
"calib/step_q_c_n": 772.0,
"calib/step_q_gap": 0.0903801200106494,
"calib/step_q_w": 0.22688671936758895,
"calib/step_q_w_n": 1265.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 2823.0,
"completions/max_terminated_length": 2823.0,
"completions/mean_length": 598.4921875,
"completions/mean_terminated_length": 617.79833984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.005467047914862633,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0484,
"num_tokens": 28018280.0,
"reward": 0.9944472312927246,
"reward_std": 0.24159783124923706,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.5943632125854492,
"rewards/format_reward_step": 0.9453125,
"step": 127
},
{
"aux_distill/final_loss": 0.0007024324584108399,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06789599265903234,
"aux_distill/mean_u": 0.2013985319393502,
"aux_distill/n_active_final_tok": 29.375,
"aux_distill/n_active_tok": 201.125,
"aux_distill/step_loss": 0.678257486782968,
"calib/answer_extract_rate": 0.91796875,
"calib/auroc": 0.587270341207349,
"calib/avg_num_step_conf": 6.7578125,
"calib/ece": 0.3896170212765958,
"calib/final_conf_rate": 0.91796875,
"calib/format_rate": 0.9140625,
"calib/frac_conf_gt_0.9": 0.10638297872340426,
"calib/gap": 0.05565835520559939,
"calib/mean_conf": 0.266468085106383,
"calib/mu_c": 0.2920472440944883,
"calib/mu_w": 0.2363888888888889,
"calib/nonempty_final_conf_rate": 0.91796875,
"calib/nonempty_reasoning_rate": 0.96484375,
"calib/nonempty_step_conf_rate": 0.96484375,
"calib/pce": 0.05782978723404256,
"calib/std_conf": 0.31932221111810316,
"calib/step_conf_rate": 0.96484375,
"calib/step_q_c": 0.3081908302354399,
"calib/step_q_c_n": 807.0,
"calib/step_q_gap": 0.05836417801442151,
"calib/step_q_w": 0.2498266522210184,
"calib/step_q_w_n": 923.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03515625,
"completions/max_length": 3033.0,
"completions/max_terminated_length": 3033.0,
"completions/mean_length": 604.89453125,
"completions/mean_terminated_length": 626.9352416992188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 209.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.005871245171874762,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0333,
"num_tokens": 28279797.0,
"reward": 0.9795734286308289,
"reward_std": 0.2901492416858673,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5489906072616577,
"rewards/format_reward_step": 0.9140625,
"step": 128
},
{
"aux_distill/final_loss": 0.00048586055117993965,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06084221811033785,
"aux_distill/mean_u": 0.18788175666293686,
"aux_distill/n_active_final_tok": 30.875,
"aux_distill/n_active_tok": 211.75,
"aux_distill/step_loss": 0.6079363012686372,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.49360562437227984,
"calib/avg_num_step_conf": 7.0625,
"calib/ece": 0.42291088709677416,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.06451612903225806,
"calib/gap": 0.0068874121191830895,
"calib/mean_conf": 0.24950846774193544,
"calib/mu_c": 0.25236896551724136,
"calib/mu_w": 0.24548155339805827,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.04387096774193548,
"calib/std_conf": 0.29721459264565586,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.28531420824295006,
"calib/step_q_c_n": 922.0,
"calib/step_q_gap": 0.027849422689902648,
"calib/step_q_w": 0.2574647855530474,
"calib/step_q_w_n": 886.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2926.0,
"completions/max_terminated_length": 2926.0,
"completions/mean_length": 561.84765625,
"completions/mean_terminated_length": 566.2716674804688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 223.0,
"epoch": 0.1376,
"grad_norm": 0.005904473830014467,
"learning_rate": 1.9722222222222224e-06,
"loss": 0.1375,
"num_tokens": 28526014.0,
"reward": 1.0348446369171143,
"reward_std": 0.23667702078819275,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.5384392142295837,
"rewards/format_reward_step": 0.96484375,
"step": 129
},
{
"aux_distill/final_loss": 0.014214775796517642,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06426420679781586,
"aux_distill/mean_u": 0.19287301748504312,
"aux_distill/n_active_final_tok": 30.375,
"aux_distill/n_active_tok": 200.375,
"aux_distill/step_loss": 0.6284272773191333,
"calib/answer_extract_rate": 0.94921875,
"calib/auroc": 0.4567132867132867,
"calib/avg_num_step_conf": 6.5390625,
"calib/ece": 0.47259259259259256,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.09465020576131687,
"calib/gap": -0.03296993006993004,
"calib/mean_conf": 0.25349794238683127,
"calib/mu_c": 0.23993006993006993,
"calib/mu_w": 0.2729,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0688065843621399,
"calib/std_conf": 0.3175233813948065,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.30043927648578816,
"calib/step_q_c_n": 774.0,
"calib/step_q_gap": 0.04557260981912148,
"calib/step_q_w": 0.2548666666666667,
"calib/step_q_w_n": 900.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2974.0,
"completions/max_terminated_length": 2974.0,
"completions/mean_length": 542.94921875,
"completions/mean_terminated_length": 558.2128295898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 200.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.005810092203319073,
"learning_rate": 1.944444444444445e-06,
"loss": 0.0745,
"num_tokens": 28770297.0,
"reward": 1.001173973083496,
"reward_std": 0.24746762216091156,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.49844178557395935,
"rewards/format_reward_step": 0.9453125,
"step": 130
},
{
"aux_distill/final_loss": 0.016351521228671118,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06701393506955355,
"aux_distill/mean_u": 0.15899207917983457,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 188.0,
"aux_distill/step_loss": 0.6537878112867475,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.49050849780701755,
"calib/avg_num_step_conf": 6.28125,
"calib/ece": 0.30870967741935484,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.07661290322580645,
"calib/gap": 0.003969298245614011,
"calib/mean_conf": 0.21548387096774194,
"calib/mu_c": 0.21791666666666668,
"calib/mu_w": 0.21394736842105266,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.06854838709677419,
"calib/std_conf": 0.2861559884745632,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2709739130434783,
"calib/step_q_c_n": 575.0,
"calib/step_q_gap": 0.04122560713834761,
"calib/step_q_w": 0.22974830590513068,
"calib/step_q_w_n": 1033.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2437.0,
"completions/max_terminated_length": 2437.0,
"completions/mean_length": 562.078125,
"completions/mean_terminated_length": 571.0000610351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 256.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.006515398155897856,
"learning_rate": 1.916666666666667e-06,
"loss": 0.0634,
"num_tokens": 29020397.0,
"reward": 0.9808332324028015,
"reward_std": 0.2079819142818451,
"rewards/accuracy_reward_step": 0.375,
"rewards/final_brier_reward_step": 0.6257289052009583,
"rewards/format_reward_step": 0.9609375,
"step": 131
},
{
"aux_distill/final_loss": 0.00035071817535481387,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06427617382723838,
"aux_distill/mean_u": 0.220794219787013,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 181.125,
"aux_distill/step_loss": 0.6424110066145658,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.516559829059829,
"calib/avg_num_step_conf": 6.02734375,
"calib/ece": 0.4125000000000001,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.09274193548387097,
"calib/gap": 0.03446047008547004,
"calib/mean_conf": 0.23145161290322577,
"calib/mu_c": 0.24590277777777775,
"calib/mu_w": 0.2114423076923077,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.03165322580645162,
"calib/std_conf": 0.30537046128666534,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2805757196495619,
"calib/step_q_c_n": 799.0,
"calib/step_q_gap": 0.034769268036658696,
"calib/step_q_w": 0.2458064516129032,
"calib/step_q_w_n": 744.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2455.0,
"completions/max_terminated_length": 2455.0,
"completions/mean_length": 536.46875,
"completions/mean_terminated_length": 551.5501708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.1408,
"grad_norm": 0.006517018191516399,
"learning_rate": 1.888888888888889e-06,
"loss": 0.0563,
"num_tokens": 29263325.0,
"reward": 1.0359539985656738,
"reward_std": 0.19730764627456665,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.5406578183174133,
"rewards/format_reward_step": 0.96875,
"step": 132
},
{
"aux_distill/final_loss": 0.0002883227602978877,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0747068403288722,
"aux_distill/mean_u": 0.22356829570609857,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 194.25,
"aux_distill/step_loss": 0.7467800760641694,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5050087108013938,
"calib/avg_num_step_conf": 6.1640625,
"calib/ece": 0.30756479999999997,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.072,
"calib/gap": -0.041652264808362316,
"calib/mean_conf": 0.20579519999999998,
"calib/mu_c": 0.1778048780487805,
"calib/mu_w": 0.21945714285714282,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.09268,
"calib/std_conf": 0.28980485402587725,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.22513432835820893,
"calib/step_q_c_n": 469.0,
"calib/step_q_gap": -0.058855662624658495,
"calib/step_q_w": 0.2839899909828674,
"calib/step_q_w_n": 1109.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2769.0,
"completions/max_terminated_length": 2769.0,
"completions/mean_length": 654.12890625,
"completions/mean_terminated_length": 656.6941528320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 243.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.005605577025562525,
"learning_rate": 1.8611111111111113e-06,
"loss": 0.2037,
"num_tokens": 29537126.0,
"reward": 0.969298779964447,
"reward_std": 0.2005949318408966,
"rewards/accuracy_reward_step": 0.3203125,
"rewards/final_brier_reward_step": 0.645628809928894,
"rewards/format_reward_step": 0.97265625,
"step": 133
},
{
"aux_distill/final_loss": 0.0003131369194306899,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06677349738311023,
"aux_distill/mean_u": 0.20721821418510009,
"aux_distill/n_active_final_tok": 30.0,
"aux_distill/n_active_tok": 173.875,
"aux_distill/step_loss": 0.6674218215048313,
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.5016752136752136,
"calib/avg_num_step_conf": 6.07421875,
"calib/ece": 0.423140082644628,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.06198347107438017,
"calib/gap": -0.010135268376068401,
"calib/mean_conf": 0.18314090909090908,
"calib/mu_c": 0.1782408,
"calib/mu_w": 0.1883760683760684,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.04487603305785123,
"calib/std_conf": 0.27150315132183267,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.23020534591194966,
"calib/step_q_c_n": 636.0,
"calib/step_q_gap": 0.04792025342011069,
"calib/step_q_w": 0.18228509249183897,
"calib/step_q_w_n": 919.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 3042.0,
"completions/max_terminated_length": 3042.0,
"completions/mean_length": 621.16796875,
"completions/mean_terminated_length": 641.2056274414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.005336909554898739,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0784,
"num_tokens": 29805097.0,
"reward": 0.9763469099998474,
"reward_std": 0.22861750423908234,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.5269124507904053,
"rewards/format_reward_step": 0.9375,
"step": 134
},
{
"aux_distill/final_loss": 0.0003893321477335121,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07088062487309799,
"aux_distill/mean_u": 0.19776091224026507,
"aux_distill/n_active_final_tok": 30.5,
"aux_distill/n_active_tok": 182.625,
"aux_distill/step_loss": 0.7084169033914804,
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.48846875838026277,
"calib/avg_num_step_conf": 5.80078125,
"calib/ece": 0.40654489795918375,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.053061224489795916,
"calib/gap": -0.034601903995709254,
"calib/mean_conf": 0.15843469387755105,
"calib/mu_c": 0.1397920353982301,
"calib/mu_w": 0.17439393939393935,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.05187755102040817,
"calib/std_conf": 0.25144952798298403,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.1987861952861953,
"calib/step_q_c_n": 594.0,
"calib/step_q_gap": -0.020565319865319842,
"calib/step_q_w": 0.21935151515151513,
"calib/step_q_w_n": 891.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2518.0,
"completions/max_terminated_length": 2518.0,
"completions/mean_length": 609.62890625,
"completions/mean_terminated_length": 624.260009765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.144,
"grad_norm": 0.006061874330043793,
"learning_rate": 1.8055555555555557e-06,
"loss": 0.1013,
"num_tokens": 30067042.0,
"reward": 0.9705733060836792,
"reward_std": 0.2059599757194519,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/final_brier_reward_step": 0.5505216121673584,
"rewards/format_reward_step": 0.94921875,
"step": 135
},
{
"aux_distill/final_loss": 0.0003441147242142506,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06527584453579038,
"aux_distill/mean_u": 0.16068252651141998,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 191.625,
"aux_distill/step_loss": 0.6524143265560269,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.492408226023448,
"calib/avg_num_step_conf": 6.11328125,
"calib/ece": 0.41375999999999996,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.052,
"calib/gap": -0.02164136075341147,
"calib/mean_conf": 0.16720000000000002,
"calib/mu_c": 0.1560330578512397,
"calib/mu_w": 0.17767441860465116,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.048479999999999995,
"calib/std_conf": 0.2597524975818327,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.22399700598802394,
"calib/step_q_c_n": 668.0,
"calib/step_q_gap": -0.004259404268386308,
"calib/step_q_w": 0.22825641025641025,
"calib/step_q_w_n": 897.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2802.0,
"completions/max_terminated_length": 2802.0,
"completions/mean_length": 577.91015625,
"completions/mean_terminated_length": 584.7628784179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.005741880275309086,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0699,
"num_tokens": 30323475.0,
"reward": 1.001725196838379,
"reward_std": 0.17823567986488342,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.5581378936767578,
"rewards/format_reward_step": 0.97265625,
"step": 136
},
{
"aux_distill/final_loss": 0.0002509895234652504,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07148401136510074,
"aux_distill/mean_u": 0.2109595339307451,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 185.75,
"aux_distill/step_loss": 0.7145891096442938,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5308645899554991,
"calib/avg_num_step_conf": 5.8203125,
"calib/ece": 0.3838804780876494,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.05179282868525897,
"calib/gap": 0.05437571519389703,
"calib/mean_conf": 0.15659760956175298,
"calib/mu_c": 0.1847603305785124,
"calib/mu_w": 0.13038461538461538,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.029203187250996014,
"calib/std_conf": 0.2571923982457517,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.239253893129771,
"calib/step_q_c_n": 655.0,
"calib/step_q_gap": 0.03216527037528,
"calib/step_q_w": 0.207088622754491,
"calib/step_q_w_n": 835.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2390.0,
"completions/max_terminated_length": 2390.0,
"completions/mean_length": 574.58984375,
"completions/mean_terminated_length": 581.4031982421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 256.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.006100764032453299,
"learning_rate": 1.75e-06,
"loss": 0.0623,
"num_tokens": 30577554.0,
"reward": 1.0213937759399414,
"reward_std": 0.1669759452342987,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.5935689210891724,
"rewards/format_reward_step": 0.9765625,
"step": 137
},
{
"aux_distill/final_loss": 0.0010311530596709417,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06165759603027254,
"aux_distill/mean_u": 0.1574107536924648,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 175.75,
"aux_distill/step_loss": 0.6155447992496192,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5907687991021325,
"calib/avg_num_step_conf": 5.5859375,
"calib/ece": 0.5249600000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.088,
"calib/gap": 0.037878787878787845,
"calib/mean_conf": 0.19,
"calib/mu_c": 0.2033333333333333,
"calib/mu_w": 0.16545454545454547,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.03348,
"calib/std_conf": 0.2955848439957638,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2696705882352941,
"calib/step_q_c_n": 850.0,
"calib/step_q_gap": 0.023498174442190656,
"calib/step_q_w": 0.24617241379310345,
"calib/step_q_w_n": 580.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3025.0,
"completions/max_terminated_length": 3025.0,
"completions/mean_length": 560.2109375,
"completions/mean_terminated_length": 566.853759765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.1472,
"grad_norm": 0.006789561361074448,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.1078,
"num_tokens": 30825304.0,
"reward": 1.0351853370666504,
"reward_std": 0.20294833183288574,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.47271445393562317,
"rewards/format_reward_step": 0.96484375,
"step": 138
},
{
"aux_distill/final_loss": 0.0005163222972441872,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06195448955986649,
"aux_distill/mean_u": 0.14828940232721982,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 169.125,
"aux_distill/step_loss": 0.6190285570919514,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.42939988271323387,
"calib/avg_num_step_conf": 5.3046875,
"calib/ece": 0.4920634920634919,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.07539682539682539,
"calib/gap": -0.05801133772072714,
"calib/mean_conf": 0.2123015873015873,
"calib/mu_c": 0.18859060402684566,
"calib/mu_w": 0.2466019417475728,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.05654761904761904,
"calib/std_conf": 0.29581515758628385,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.25153333333333333,
"calib/step_q_c_n": 750.0,
"calib/step_q_gap": -0.047710087719298244,
"calib/step_q_w": 0.2992434210526316,
"calib/step_q_w_n": 608.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2708.0,
"completions/max_terminated_length": 2708.0,
"completions/mean_length": 526.41015625,
"completions/mean_terminated_length": 530.5551147460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.0061951130628585815,
"learning_rate": 1.6944444444444446e-06,
"loss": 0.0623,
"num_tokens": 31063161.0,
"reward": 1.028887152671814,
"reward_std": 0.17283034324645996,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.49136799573898315,
"rewards/format_reward_step": 0.984375,
"step": 139
},
{
"aux_distill/final_loss": 0.0002839611361196148,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06980497867334634,
"aux_distill/mean_u": 0.18654986838888957,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 174.875,
"aux_distill/step_loss": 0.6977658206596971,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5171219187208528,
"calib/avg_num_step_conf": 5.63671875,
"calib/ece": 0.4897628458498023,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.07509881422924901,
"calib/gap": 0.015161892071952038,
"calib/mean_conf": 0.20620553359683796,
"calib/mu_c": 0.21189873417721522,
"calib/mu_w": 0.19673684210526318,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.035731225296442695,
"calib/std_conf": 0.29114573529225096,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29182142857142856,
"calib/step_q_c_n": 840.0,
"calib/step_q_gap": 0.044039670694148325,
"calib/step_q_w": 0.24778175787728024,
"calib/step_q_w_n": 603.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2131.0,
"completions/max_terminated_length": 2131.0,
"completions/mean_length": 536.96484375,
"completions/mean_terminated_length": 541.1929321289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.006557528395205736,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0498,
"num_tokens": 31305640.0,
"reward": 1.052268385887146,
"reward_std": 0.185662180185318,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.5029742121696472,
"rewards/format_reward_step": 0.984375,
"step": 140
},
{
"aux_distill/final_loss": 0.0001985268124826689,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06410034786676988,
"aux_distill/mean_u": 0.1784564167208868,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 160.625,
"aux_distill/step_loss": 0.6408049371093512,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5053041018387552,
"calib/avg_num_step_conf": 5.02734375,
"calib/ece": 0.45168627450980386,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.09411764705882353,
"calib/gap": 0.01754854056834257,
"calib/mean_conf": 0.2396078431372549,
"calib/mu_c": 0.24655844155844156,
"calib/mu_w": 0.229009900990099,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04368627450980392,
"calib/std_conf": 0.32268336197478165,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29642659279778394,
"calib/step_q_c_n": 722.0,
"calib/step_q_gap": 0.04403721226681051,
"calib/step_q_w": 0.25238938053097343,
"calib/step_q_w_n": 565.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1320.0,
"completions/max_terminated_length": 1320.0,
"completions/mean_length": 540.13671875,
"completions/mean_terminated_length": 542.2549438476562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 219.0,
"epoch": 0.1504,
"grad_norm": 0.006194172892719507,
"learning_rate": 1.638888888888889e-06,
"loss": 0.0842,
"num_tokens": 31551011.0,
"reward": 1.0600550174713135,
"reward_std": 0.19128084182739258,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.5263601541519165,
"rewards/format_reward_step": 0.9921875,
"step": 141
},
{
"aux_distill/final_loss": 0.00028447176589452283,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07187848904868588,
"aux_distill/mean_u": 0.21142897708295655,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 178.25,
"aux_distill/step_loss": 0.7185004046186805,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5234375,
"calib/avg_num_step_conf": 5.5703125,
"calib/ece": 0.38613557312252966,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.09486166007905138,
"calib/gap": 0.03321953124999996,
"calib/mean_conf": 0.22200671936758895,
"calib/mu_c": 0.23841953124999996,
"calib/mu_w": 0.2052,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.05110671936758893,
"calib/std_conf": 0.30402223549959345,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.28538184663536775,
"calib/step_q_c_n": 639.0,
"calib/step_q_gap": 0.02116329517412252,
"calib/step_q_w": 0.26421855146124523,
"calib/step_q_w_n": 787.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1899.0,
"completions/max_terminated_length": 1899.0,
"completions/mean_length": 543.82421875,
"completions/mean_terminated_length": 545.9569091796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.006887676659971476,
"learning_rate": 1.6111111111111113e-06,
"loss": 0.0971,
"num_tokens": 31795390.0,
"reward": 1.0296494960784912,
"reward_std": 0.18625324964523315,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.5788301229476929,
"rewards/format_reward_step": 0.9765625,
"step": 142
},
{
"aux_distill/final_loss": 0.0002852490885061343,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0663798114983365,
"aux_distill/mean_u": 0.18406928543141352,
"aux_distill/n_active_final_tok": 30.875,
"aux_distill/n_active_tok": 183.625,
"aux_distill/step_loss": 0.6635128539055586,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.45027642276422775,
"calib/avg_num_step_conf": 6.20703125,
"calib/ece": 0.4281854838709678,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.07661290322580645,
"calib/gap": -0.04782504065040649,
"calib/mean_conf": 0.18004032258064515,
"calib/mu_c": 0.15593495934959353,
"calib/mu_w": 0.20376000000000002,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.05612903225806452,
"calib/std_conf": 0.27363517327183806,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.20450949367088608,
"calib/step_q_c_n": 632.0,
"calib/step_q_gap": -0.08326480099996028,
"calib/step_q_w": 0.28777429467084636,
"calib/step_q_w_n": 957.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 3045.0,
"completions/max_terminated_length": 3045.0,
"completions/mean_length": 576.08984375,
"completions/mean_terminated_length": 585.234130859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 226.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.006010540761053562,
"learning_rate": 1.5833333333333333e-06,
"loss": 0.0568,
"num_tokens": 32050205.0,
"reward": 0.9878466725349426,
"reward_std": 0.1761135756969452,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.5303808450698853,
"rewards/format_reward_step": 0.96484375,
"step": 143
},
{
"aux_distill/final_loss": 0.00022862903210807417,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07271759235300124,
"aux_distill/mean_u": 0.18098461256417697,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 162.25,
"aux_distill/step_loss": 0.7269472843036056,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5503400013203934,
"calib/avg_num_step_conf": 5.0703125,
"calib/ece": 0.4511416666666668,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.06746031746031746,
"calib/gap": 0.05464961378490785,
"calib/mean_conf": 0.21560436507936506,
"calib/mu_c": 0.23707385620915034,
"calib/mu_w": 0.1824242424242425,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.029801587301587294,
"calib/std_conf": 0.28706309967537064,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.3026603888213852,
"calib/step_q_c_n": 823.0,
"calib/step_q_gap": 0.045081441452964166,
"calib/step_q_w": 0.257578947368421,
"calib/step_q_w_n": 475.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2901.0,
"completions/max_terminated_length": 2901.0,
"completions/mean_length": 541.40234375,
"completions/mean_terminated_length": 543.5255126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.0,
"epoch": 0.1536,
"grad_norm": 0.006602726876735687,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.105,
"num_tokens": 32292932.0,
"reward": 1.0560318231582642,
"reward_std": 0.1847558617591858,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.5378449559211731,
"rewards/format_reward_step": 0.9765625,
"step": 144
},
{
"aux_distill/final_loss": 0.00019312693302708794,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06420326471561566,
"aux_distill/mean_u": 0.17593388530767018,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 180.375,
"aux_distill/step_loss": 0.6418395061045885,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5214498510427011,
"calib/avg_num_step_conf": 5.75,
"calib/ece": 0.45962992125984253,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.06299212598425197,
"calib/gap": 0.03884342932803714,
"calib/mean_conf": 0.22115748031496063,
"calib/mu_c": 0.23568553459119498,
"calib/mu_w": 0.19684210526315785,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.027401574803149607,
"calib/std_conf": 0.28398247492775347,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.28976551724137933,
"calib/step_q_c_n": 870.0,
"calib/step_q_gap": 0.04845322488257528,
"calib/step_q_w": 0.24131229235880405,
"calib/step_q_w_n": 602.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2605.0,
"completions/max_terminated_length": 2605.0,
"completions/mean_length": 518.765625,
"completions/mean_terminated_length": 520.800048828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.007395788095891476,
"learning_rate": 1.527777777777778e-06,
"loss": 0.0727,
"num_tokens": 32528440.0,
"reward": 1.0674223899841309,
"reward_std": 0.18406549096107483,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.5293761491775513,
"rewards/format_reward_step": 0.984375,
"step": 145
},
{
"aux_distill/final_loss": 0.008665551134754423,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07391537714283913,
"aux_distill/mean_u": 0.16548633324082324,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 161.375,
"aux_distill/step_loss": 0.7304882053285837,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5897919978802332,
"calib/avg_num_step_conf": 5.1484375,
"calib/ece": 0.284732,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.072,
"calib/gap": 0.09475397456279805,
"calib/mean_conf": 0.20478799999999997,
"calib/mu_c": 0.26088235294117645,
"calib/mu_w": 0.1661283783783784,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.04075999999999999,
"calib/std_conf": 0.2854098650292242,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.27421766990291263,
"calib/step_q_c_n": 515.0,
"calib/step_q_gap": 0.07378803104861625,
"calib/step_q_w": 0.20042963885429638,
"calib/step_q_w_n": 803.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2811.0,
"completions/max_terminated_length": 2811.0,
"completions/mean_length": 539.015625,
"completions/mean_terminated_length": 543.2598266601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.006177644710987806,
"learning_rate": 1.5e-06,
"loss": 0.1055,
"num_tokens": 32773644.0,
"reward": 1.012442946434021,
"reward_std": 0.18265041708946228,
"rewards/accuracy_reward_step": 0.3984375,
"rewards/final_brier_reward_step": 0.6576983332633972,
"rewards/format_reward_step": 0.96875,
"step": 146
},
{
"aux_distill/final_loss": 0.0001339657533208083,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06743633723817766,
"aux_distill/mean_u": 0.17703730969472667,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 162.625,
"aux_distill/step_loss": 0.6742293937131763,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.536065786065786,
"calib/avg_num_step_conf": 5.08203125,
"calib/ece": 0.37140562248995973,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.09236947791164658,
"calib/gap": 0.018074980574980498,
"calib/mean_conf": 0.24947791164658634,
"calib/mu_c": 0.259059829059829,
"calib/mu_w": 0.2409848484848485,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.07550200803212852,
"calib/std_conf": 0.31741951595719226,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2965528455284553,
"calib/step_q_c_n": 615.0,
"calib/step_q_gap": -0.009103131147929544,
"calib/step_q_w": 0.30565597667638483,
"calib/step_q_w_n": 686.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3039.0,
"completions/max_terminated_length": 3039.0,
"completions/mean_length": 532.34765625,
"completions/mean_terminated_length": 538.6600952148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.1568,
"grad_norm": 0.006623013410717249,
"learning_rate": 1.4722222222222225e-06,
"loss": 0.0796,
"num_tokens": 33013605.0,
"reward": 1.0117859840393066,
"reward_std": 0.19896234571933746,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/final_brier_reward_step": 0.5938844084739685,
"rewards/format_reward_step": 0.97265625,
"step": 147
},
{
"aux_distill/final_loss": 0.00013361726644234295,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0728413398610428,
"aux_distill/mean_u": 0.21564378818813051,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 165.5,
"aux_distill/step_loss": 0.728279777802527,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5446356275303643,
"calib/avg_num_step_conf": 5.21875,
"calib/ece": 0.45294820717131484,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.05976095617529881,
"calib/gap": 0.056703778677462846,
"calib/mean_conf": 0.21629482071713146,
"calib/mu_c": 0.23775641025641028,
"calib/mu_w": 0.18105263157894744,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.023864541832669325,
"calib/std_conf": 0.2871709388857196,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2900251256281407,
"calib/step_q_c_n": 796.0,
"calib/step_q_gap": 0.03487697747999258,
"calib/step_q_w": 0.2551481481481481,
"calib/step_q_w_n": 540.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2441.0,
"completions/max_terminated_length": 2441.0,
"completions/mean_length": 496.66796875,
"completions/mean_terminated_length": 502.5573425292969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.0063850851729512215,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.084,
"num_tokens": 33245864.0,
"reward": 1.0619884729385376,
"reward_std": 0.1776794195175171,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.5341331958770752,
"rewards/format_reward_step": 0.98046875,
"step": 148
},
{
"aux_distill/final_loss": 0.00015465507885892293,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07203872106038034,
"aux_distill/mean_u": 0.18082896538288876,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 169.5,
"aux_distill/step_loss": 0.7202325398102403,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.48552766393442626,
"calib/avg_num_step_conf": 5.3046875,
"calib/ece": 0.4096,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.088,
"calib/gap": -0.012695952868852445,
"calib/mean_conf": 0.23232,
"calib/mu_c": 0.22581967213114754,
"calib/mu_w": 0.23851562499999998,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.07696000000000001,
"calib/std_conf": 0.31102981464804946,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2796116504854369,
"calib/step_q_c_n": 618.0,
"calib/step_q_gap": -0.025774836001049573,
"calib/step_q_w": 0.3053864864864865,
"calib/step_q_w_n": 740.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2807.0,
"completions/max_terminated_length": 2807.0,
"completions/mean_length": 536.171875,
"completions/mean_terminated_length": 542.5296630859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.005627046804875135,
"learning_rate": 1.4166666666666667e-06,
"loss": 0.0962,
"num_tokens": 33487580.0,
"reward": 1.0105897188186646,
"reward_std": 0.21034207940101624,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.5680545568466187,
"rewards/format_reward_step": 0.9765625,
"step": 149
},
{
"aux_distill/final_loss": 0.00021909270560627192,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07247080851811916,
"aux_distill/mean_u": 0.2084789292080666,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 158.875,
"aux_distill/step_loss": 0.7244889652356505,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5141650422352176,
"calib/avg_num_step_conf": 5.03125,
"calib/ece": 0.4372690763052209,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.07228915662650602,
"calib/gap": -0.008651072124756326,
"calib/mean_conf": 0.19759036144578312,
"calib/mu_c": 0.19362962962962962,
"calib/mu_w": 0.20228070175438595,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.046345381526104415,
"calib/std_conf": 0.28539941867528507,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.2518639053254438,
"calib/step_q_c_n": 676.0,
"calib/step_q_gap": 0.00945197722086863,
"calib/step_q_w": 0.24241192810457515,
"calib/step_q_w_n": 612.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3066.0,
"completions/max_terminated_length": 3066.0,
"completions/mean_length": 484.25390625,
"completions/mean_terminated_length": 488.0669250488281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.16,
"grad_norm": 0.006086606997996569,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.1095,
"num_tokens": 33716509.0,
"reward": 1.0141351222991943,
"reward_std": 0.1790996640920639,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.5321765542030334,
"rewards/format_reward_step": 0.96875,
"step": 150
},
{
"aux_distill/final_loss": 0.000140511143627009,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07645413489080966,
"aux_distill/mean_u": 0.22830244143183132,
"aux_distill/n_active_final_tok": 30.875,
"aux_distill/n_active_tok": 152.75,
"aux_distill/step_loss": 0.7644008286297321,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5012140131807146,
"calib/avg_num_step_conf": 4.828125,
"calib/ece": 0.3078584677419355,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.04838709677419355,
"calib/gap": -0.007641505376344143,
"calib/mean_conf": 0.2039157258064516,
"calib/mu_c": 0.19913978494623655,
"calib/mu_w": 0.2067812903225807,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.06838709677419355,
"calib/std_conf": 0.2666086343424099,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.25983981693363845,
"calib/step_q_c_n": 437.0,
"calib/step_q_gap": 0.008340317559420662,
"calib/step_q_w": 0.2514994993742178,
"calib/step_q_w_n": 799.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2820.0,
"completions/max_terminated_length": 2820.0,
"completions/mean_length": 553.6640625,
"completions/mean_terminated_length": 558.0236206054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.006590255536139011,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0612,
"num_tokens": 33965271.0,
"reward": 0.9828296303749084,
"reward_std": 0.17969170212745667,
"rewards/accuracy_reward_step": 0.36328125,
"rewards/final_brier_reward_step": 0.6375342607498169,
"rewards/format_reward_step": 0.96484375,
"step": 151
},
{
"aux_distill/final_loss": 0.010517073303617508,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.08004386606626213,
"aux_distill/mean_u": 0.1953560004997596,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 181.375,
"aux_distill/step_loss": 0.7899215742945671,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5205649000317358,
"calib/avg_num_step_conf": 5.76953125,
"calib/ece": 0.3487797619047619,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.06746031746031746,
"calib/gap": 0.02912456363059346,
"calib/mean_conf": 0.20860119047619047,
"calib/mu_c": 0.22443478260869565,
"calib/mu_w": 0.1953102189781022,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.050515873015873014,
"calib/std_conf": 0.28373842308837555,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3100977653631285,
"calib/step_q_c_n": 716.0,
"calib/step_q_gap": 0.028001181920290108,
"calib/step_q_w": 0.2820965834428384,
"calib/step_q_w_n": 761.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2882.0,
"completions/max_terminated_length": 2882.0,
"completions/mean_length": 510.4609375,
"completions/mean_terminated_length": 514.4802856445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.006801180075854063,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0875,
"num_tokens": 34201341.0,
"reward": 1.012510061264038,
"reward_std": 0.20159053802490234,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.6031451225280762,
"rewards/format_reward_step": 0.97265625,
"step": 152
},
{
"aux_distill/final_loss": 0.0005106443512659098,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0817980298306793,
"aux_distill/mean_u": 0.26963061827811996,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 157.125,
"aux_distill/step_loss": 0.8174696424975991,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5102924890886587,
"calib/avg_num_step_conf": 5.2578125,
"calib/ece": 0.414717741935484,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.07661290322580645,
"calib/gap": 0.009611751677415153,
"calib/mean_conf": 0.20600806451612905,
"calib/mu_c": 0.21062015503875967,
"calib/mu_w": 0.2010084033613445,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.05028225806451612,
"calib/std_conf": 0.297691033704186,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.273472,
"calib/step_q_c_n": 625.0,
"calib/step_q_gap": 0.02140542579750343,
"calib/step_q_w": 0.25206657420249656,
"calib/step_q_w_n": 721.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2564.0,
"completions/max_terminated_length": 2564.0,
"completions/mean_length": 496.21875,
"completions/mean_terminated_length": 506.1036071777344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.1632,
"grad_norm": 0.007670701947063208,
"learning_rate": 1.3055555555555556e-06,
"loss": 0.0914,
"num_tokens": 34435693.0,
"reward": 1.0133540630340576,
"reward_std": 0.19515293836593628,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.55014568567276,
"rewards/format_reward_step": 0.96875,
"step": 153
},
{
"aux_distill/final_loss": 0.00012748148060381936,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07031724287662655,
"aux_distill/mean_u": 0.1923050521109305,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 159.875,
"aux_distill/step_loss": 0.7030449416488409,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4036673553719008,
"calib/avg_num_step_conf": 4.99609375,
"calib/ece": 0.4186345381526104,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.10843373493975904,
"calib/gap": -0.07913094008264462,
"calib/mean_conf": 0.245140562248996,
"calib/mu_c": 0.20446280991735535,
"calib/mu_w": 0.28359375,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.08891566265060241,
"calib/std_conf": 0.3174093177922958,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2714490161001789,
"calib/step_q_c_n": 559.0,
"calib/step_q_gap": -0.036828761677598854,
"calib/step_q_w": 0.30827777777777776,
"calib/step_q_w_n": 720.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2829.0,
"completions/max_terminated_length": 2829.0,
"completions/mean_length": 495.26953125,
"completions/mean_terminated_length": 501.1423034667969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.0063225575722754,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.0669,
"num_tokens": 34666922.0,
"reward": 0.9910746812820435,
"reward_std": 0.2115459442138672,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.5368367433547974,
"rewards/format_reward_step": 0.97265625,
"step": 154
},
{
"aux_distill/final_loss": 0.00013211382838562713,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07154768169857562,
"aux_distill/mean_u": 0.19281167140165867,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 172.75,
"aux_distill/step_loss": 0.715344687923789,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5510559006211181,
"calib/avg_num_step_conf": 5.3984375,
"calib/ece": 0.33588235294117647,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.03529411764705882,
"calib/gap": 0.051487577639751536,
"calib/mean_conf": 0.19086274509803922,
"calib/mu_c": 0.21913043478260869,
"calib/mu_w": 0.16764285714285715,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.037882352941176464,
"calib/std_conf": 0.2686554042991339,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2655172413793103,
"calib/step_q_c_n": 609.0,
"calib/step_q_gap": 0.05232189855912919,
"calib/step_q_w": 0.21319534282018113,
"calib/step_q_w_n": 773.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1192.0,
"completions/max_terminated_length": 1192.0,
"completions/mean_length": 466.47265625,
"completions/mean_terminated_length": 468.302001953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.007305369712412357,
"learning_rate": 1.25e-06,
"loss": 0.0993,
"num_tokens": 34893555.0,
"reward": 1.0365357398986816,
"reward_std": 0.1448049545288086,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.6316652297973633,
"rewards/format_reward_step": 0.9921875,
"step": 155
},
{
"aux_distill/final_loss": 0.00015378354362383106,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07364484970457852,
"aux_distill/mean_u": 0.19237760412891566,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 172.375,
"aux_distill/step_loss": 0.7362947026267648,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4962834645669292,
"calib/avg_num_step_conf": 5.45703125,
"calib/ece": 0.4131349206349206,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.05952380952380952,
"calib/gap": -0.019175433070866194,
"calib/mean_conf": 0.22297619047619047,
"calib/mu_c": 0.21346456692913385,
"calib/mu_w": 0.23264000000000004,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.06607142857142856,
"calib/std_conf": 0.27947845316287434,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.27607246376811595,
"calib/step_q_c_n": 690.0,
"calib/step_q_gap": -0.01774677244121925,
"calib/step_q_w": 0.2938192362093352,
"calib/step_q_w_n": 707.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2371.0,
"completions/max_terminated_length": 2371.0,
"completions/mean_length": 489.56640625,
"completions/mean_terminated_length": 493.4212646484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.1664,
"grad_norm": 0.005915569607168436,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.0569,
"num_tokens": 35123644.0,
"reward": 1.0273587703704834,
"reward_std": 0.17374806106090546,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5742487907409668,
"rewards/format_reward_step": 0.984375,
"step": 156
},
{
"aux_distill/final_loss": 0.00880513777633496,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07628553418908268,
"aux_distill/mean_u": 0.2348693963470195,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 182.75,
"aux_distill/step_loss": 0.7540501952171326,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4681208053691275,
"calib/avg_num_step_conf": 5.85546875,
"calib/ece": 0.42094117647058826,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.047058823529411764,
"calib/gap": -0.002948588071419511,
"calib/mean_conf": 0.22384313725490196,
"calib/mu_c": 0.22261744966442953,
"calib/mu_w": 0.22556603773584905,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03023529411764707,
"calib/std_conf": 0.27768958353860507,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3245949074074074,
"calib/step_q_c_n": 864.0,
"calib/step_q_gap": 0.08065789953339167,
"calib/step_q_w": 0.2439370078740157,
"calib/step_q_w_n": 635.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1144.0,
"completions/max_terminated_length": 1144.0,
"completions/mean_length": 468.7109375,
"completions/mean_terminated_length": 470.5490417480469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.006663069594651461,
"learning_rate": 1.1944444444444446e-06,
"loss": 0.0444,
"num_tokens": 35347362.0,
"reward": 1.062303900718689,
"reward_std": 0.16383245587348938,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.5464828014373779,
"rewards/format_reward_step": 0.99609375,
"step": 157
},
{
"aux_distill/final_loss": 0.005067012140671068,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07419109647162259,
"aux_distill/mean_u": 0.20995247473327852,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 169.25,
"aux_distill/step_loss": 0.7368439408019185,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4461863604819885,
"calib/avg_num_step_conf": 5.2890625,
"calib/ece": 0.42595238095238097,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.07142857142857142,
"calib/gap": -0.05374550501545641,
"calib/mean_conf": 0.21412698412698414,
"calib/mu_c": 0.1883206106870229,
"calib/mu_w": 0.24206611570247932,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.060119047619047614,
"calib/std_conf": 0.29138086634481836,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.25353722627737224,
"calib/step_q_c_n": 685.0,
"calib/step_q_gap": -0.06499790077793416,
"calib/step_q_w": 0.3185351270553064,
"calib/step_q_w_n": 669.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1386.0,
"completions/max_terminated_length": 1386.0,
"completions/mean_length": 460.38671875,
"completions/mean_terminated_length": 464.0118103027344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.006863833405077457,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0773,
"num_tokens": 35570461.0,
"reward": 1.0202934741973877,
"reward_std": 0.174790158867836,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.5366804599761963,
"rewards/format_reward_step": 0.984375,
"step": 158
},
{
"aux_distill/final_loss": 7.75144445128717e-05,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07439770107157528,
"aux_distill/mean_u": 0.197982069789905,
"aux_distill/n_active_final_tok": 31.0,
"aux_distill/n_active_tok": 153.875,
"aux_distill/step_loss": 0.7438994897529483,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5158219623131903,
"calib/avg_num_step_conf": 4.81640625,
"calib/ece": 0.3983132530120482,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.06827309236947791,
"calib/gap": 0.048635477582845976,
"calib/mean_conf": 0.19847389558232933,
"calib/mu_c": 0.2207407407407407,
"calib/mu_w": 0.17210526315789473,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.02730923694779116,
"calib/std_conf": 0.27711088410534096,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.32632558139534884,
"calib/step_q_c_n": 645.0,
"calib/step_q_gap": 0.0776861256130359,
"calib/step_q_w": 0.24863945578231295,
"calib/step_q_w_n": 588.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1435.0,
"completions/max_terminated_length": 1435.0,
"completions/mean_length": 436.8515625,
"completions/mean_terminated_length": 445.5538024902344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.1696,
"grad_norm": 0.006662644911557436,
"learning_rate": 1.138888888888889e-06,
"loss": 0.0472,
"num_tokens": 35787079.0,
"reward": 1.028663158416748,
"reward_std": 0.1973605453968048,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.5612324476242065,
"rewards/format_reward_step": 0.96875,
"step": 159
},
{
"aux_distill/final_loss": 0.0059038225367658015,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07914891897235066,
"aux_distill/mean_u": 0.1979178449540299,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 157.25,
"aux_distill/step_loss": 0.7855853512883186,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5008529727680546,
"calib/avg_num_step_conf": 5.14453125,
"calib/ece": 0.37162698412698403,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.07142857142857142,
"calib/gap": 0.0014285714285714457,
"calib/mean_conf": 0.23496031746031745,
"calib/mu_c": 0.23571428571428574,
"calib/mu_w": 0.2342857142857143,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.06718253968253968,
"calib/std_conf": 0.2983173685750631,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.309696394686907,
"calib/step_q_c_n": 527.0,
"calib/step_q_gap": 0.03393310354766649,
"calib/step_q_w": 0.27576329113924053,
"calib/step_q_w_n": 790.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2871.0,
"completions/max_terminated_length": 2871.0,
"completions/mean_length": 462.08984375,
"completions/mean_terminated_length": 465.72833251953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.006196652539074421,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0816,
"num_tokens": 36010214.0,
"reward": 1.0190675258636475,
"reward_std": 0.20500755310058594,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.5928226709365845,
"rewards/format_reward_step": 0.98046875,
"step": 160
},
{
"aux_distill/final_loss": 0.00010458140604896471,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06749991769902408,
"aux_distill/mean_u": 0.17813880146622404,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 156.625,
"aux_distill/step_loss": 0.6748945843428373,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5069444444444444,
"calib/avg_num_step_conf": 4.89453125,
"calib/ece": 0.48333333333333345,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.058823529411764705,
"calib/gap": 0.002220853858784927,
"calib/mean_conf": 0.22364705882352942,
"calib/mu_c": 0.2244047619047619,
"calib/mu_w": 0.22218390804597699,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.024078431372549027,
"calib/std_conf": 0.2661556984636677,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2867741935483871,
"calib/step_q_c_n": 775.0,
"calib/step_q_gap": -0.028267647455796963,
"calib/step_q_w": 0.3150418410041841,
"calib/step_q_w_n": 478.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2907.0,
"completions/max_terminated_length": 2907.0,
"completions/mean_length": 461.68359375,
"completions/mean_terminated_length": 461.68359375,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.007173808291554451,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.1101,
"num_tokens": 36232325.0,
"reward": 1.0831670761108398,
"reward_std": 0.15231254696846008,
"rewards/accuracy_reward_step": 0.65625,
"rewards/final_brier_reward_step": 0.5139902234077454,
"rewards/format_reward_step": 0.99609375,
"step": 161
},
{
"aux_distill/final_loss": 0.0001530974079742009,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06709114729892462,
"aux_distill/mean_u": 0.18809368684457695,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 164.25,
"aux_distill/step_loss": 0.6707583647221327,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5302566440854611,
"calib/avg_num_step_conf": 5.1328125,
"calib/ece": 0.4387632411067194,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.05533596837944664,
"calib/gap": 0.03453030224075038,
"calib/mean_conf": 0.21483359683794467,
"calib/mu_c": 0.22861842105263158,
"calib/mu_w": 0.1940881188118812,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.026403162055335966,
"calib/std_conf": 0.2695710060266586,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.29939058171745153,
"calib/step_q_c_n": 722.0,
"calib/step_q_gap": 0.024668960095829917,
"calib/step_q_w": 0.2747216216216216,
"calib/step_q_w_n": 592.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2698.0,
"completions/max_terminated_length": 2698.0,
"completions/mean_length": 460.08203125,
"completions/mean_terminated_length": 461.88629150390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.0,
"epoch": 0.1728,
"grad_norm": 0.006731119938194752,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.1222,
"num_tokens": 36454250.0,
"reward": 1.061426043510437,
"reward_std": 0.1838788390159607,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.544727087020874,
"rewards/format_reward_step": 0.984375,
"step": 162
},
{
"aux_distill/final_loss": 0.0001396006812228734,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07542417175136507,
"aux_distill/mean_u": 0.22200943521140018,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 164.0,
"aux_distill/step_loss": 0.7541021099314094,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5280830280830281,
"calib/avg_num_step_conf": 5.23046875,
"calib/ece": 0.4085516,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.064,
"calib/gap": 0.03042482488271958,
"calib/mean_conf": 0.20064839999999998,
"calib/mu_c": 0.21488721804511277,
"calib/mu_w": 0.18446239316239318,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.038599999999999995,
"calib/std_conf": 0.2834424195801327,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2612359550561798,
"calib/step_q_c_n": 623.0,
"calib/step_q_gap": 0.005341541648358539,
"calib/step_q_w": 0.25589441340782126,
"calib/step_q_w_n": 716.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2888.0,
"completions/max_terminated_length": 2888.0,
"completions/mean_length": 472.203125,
"completions/mean_terminated_length": 481.6095886230469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.006165460217744112,
"learning_rate": 1.0277777777777777e-06,
"loss": 0.0517,
"num_tokens": 36679966.0,
"reward": 1.0293166637420654,
"reward_std": 0.18782052397727966,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.5625396966934204,
"rewards/format_reward_step": 0.9765625,
"step": 163
},
{
"aux_distill/final_loss": 0.00014845711132238648,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0759313030866906,
"aux_distill/mean_u": 0.18177047881912312,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 154.875,
"aux_distill/step_loss": 0.7591645568609238,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.48111811631497686,
"calib/avg_num_step_conf": 4.84765625,
"calib/ece": 0.4277200000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.084,
"calib/gap": -0.015193000514668015,
"calib/mean_conf": 0.22996,
"calib/mu_c": 0.22291044776119404,
"calib/mu_w": 0.23810344827586205,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.060840000000000005,
"calib/std_conf": 0.3054629247551984,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2821812596006144,
"calib/step_q_c_n": 651.0,
"calib/step_q_gap": -0.024954842094300822,
"calib/step_q_w": 0.30713610169491523,
"calib/step_q_w_n": 590.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3021.0,
"completions/max_terminated_length": 3021.0,
"completions/mean_length": 516.4765625,
"completions/mean_terminated_length": 518.5020141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.0059613315388560295,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1579,
"num_tokens": 36918320.0,
"reward": 1.0218608379364014,
"reward_std": 0.19381961226463318,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5437214374542236,
"rewards/format_reward_step": 0.9765625,
"step": 164
},
{
"aux_distill/final_loss": 0.00011706302370839694,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.081173887825571,
"aux_distill/mean_u": 0.2349289558838646,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 158.125,
"aux_distill/step_loss": 0.8116217972710729,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4514854614412136,
"calib/avg_num_step_conf": 4.94140625,
"calib/ece": 0.34383399209486165,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.04743083003952569,
"calib/gap": -0.007164348925410846,
"calib/mean_conf": 0.1923715415019763,
"calib/mu_c": 0.18840707964601772,
"calib/mu_w": 0.19557142857142856,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04478260869565218,
"calib/std_conf": 0.25780448113727916,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.23833005893909626,
"calib/step_q_c_n": 509.0,
"calib/step_q_gap": 0.0017824398914772221,
"calib/step_q_w": 0.23654761904761903,
"calib/step_q_w_n": 756.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1477.0,
"completions/max_terminated_length": 1477.0,
"completions/mean_length": 480.9453125,
"completions/mean_terminated_length": 484.7322692871094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.176,
"grad_norm": 0.007403014227747917,
"learning_rate": 9.722222222222224e-07,
"loss": 0.0772,
"num_tokens": 37147018.0,
"reward": 1.0222697257995605,
"reward_std": 0.14842969179153442,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.6109457015991211,
"rewards/format_reward_step": 0.98828125,
"step": 165
},
{
"aux_distill/final_loss": 0.00015963665589424636,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07058893702924252,
"aux_distill/mean_u": 0.21124927093897258,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 154.875,
"aux_distill/step_loss": 0.7057297229766846,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.48443930041152267,
"calib/avg_num_step_conf": 4.84375,
"calib/ece": 0.4321428571428571,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.051587301587301584,
"calib/gap": 0.004398148148148179,
"calib/mean_conf": 0.21547619047619046,
"calib/mu_c": 0.21736111111111114,
"calib/mu_w": 0.21296296296296297,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0380952380952381,
"calib/std_conf": 0.2738586912377256,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.26814132104454685,
"calib/step_q_c_n": 651.0,
"calib/step_q_gap": -0.014829816476675584,
"calib/step_q_w": 0.28297113752122244,
"calib/step_q_w_n": 589.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2441.0,
"completions/max_terminated_length": 2441.0,
"completions/mean_length": 530.421875,
"completions/mean_terminated_length": 532.5020141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 224.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.0061018322594463825,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0774,
"num_tokens": 37388990.0,
"reward": 1.0370354652404785,
"reward_std": 0.20210300385951996,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.5389144420623779,
"rewards/format_reward_step": 0.97265625,
"step": 166
},
{
"aux_distill/final_loss": 0.00012661207426845067,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07728934357874095,
"aux_distill/mean_u": 0.26433023776783365,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 151.375,
"aux_distill/step_loss": 0.7727668080478907,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5267295597484277,
"calib/avg_num_step_conf": 4.73046875,
"calib/ece": 0.510395256916996,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.039525691699604744,
"calib/gap": -0.006399705606851325,
"calib/mean_conf": 0.16778656126482214,
"calib/mu_c": 0.16540880503144656,
"calib/mu_w": 0.17180851063829788,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.024861660079051384,
"calib/std_conf": 0.24258457206098058,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.23089552238805972,
"calib/step_q_c_n": 737.0,
"calib/step_q_gap": 0.004608602556836094,
"calib/step_q_w": 0.22628691983122362,
"calib/step_q_w_n": 474.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2733.0,
"completions/max_terminated_length": 2733.0,
"completions/mean_length": 477.91015625,
"completions/mean_terminated_length": 481.6732177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.006885558366775513,
"learning_rate": 9.166666666666666e-07,
"loss": 0.0723,
"num_tokens": 37616943.0,
"reward": 1.0405546426773071,
"reward_std": 0.15715563297271729,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.4834531545639038,
"rewards/format_reward_step": 0.9765625,
"step": 167
},
{
"aux_distill/final_loss": 0.013254495221190155,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07616680616047233,
"aux_distill/mean_u": 0.25441551732470813,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 149.875,
"aux_distill/step_loss": 0.7484135506674647,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5167754247722236,
"calib/avg_num_step_conf": 4.68359375,
"calib/ece": 0.39211764705882346,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.07058823529411765,
"calib/gap": 0.004357916769268638,
"calib/mean_conf": 0.20074509803921567,
"calib/mu_c": 0.20298387096774193,
"calib/mu_w": 0.1986259541984733,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.053294117647058825,
"calib/std_conf": 0.2808834691830128,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.22377016129032257,
"calib/step_q_c_n": 496.0,
"calib/step_q_gap": -0.02783723558023221,
"calib/step_q_w": 0.2516073968705548,
"calib/step_q_w_n": 703.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2260.0,
"completions/max_terminated_length": 2260.0,
"completions/mean_length": 536.64453125,
"completions/mean_terminated_length": 536.64453125,
"completions/min_length": 169.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.1792,
"grad_norm": 0.005739326123148203,
"learning_rate": 8.88888888888889e-07,
"loss": 0.1019,
"num_tokens": 37858996.0,
"reward": 1.0350497961044312,
"reward_std": 0.155503511428833,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5896308422088623,
"rewards/format_reward_step": 0.99609375,
"step": 168
},
{
"aux_distill/final_loss": 0.00016585832759119512,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07224566210061312,
"aux_distill/mean_u": 0.1620567446785026,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 137.75,
"aux_distill/step_loss": 0.7222907477989793,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.47317228226319136,
"calib/avg_num_step_conf": 4.3046875,
"calib/ece": 0.39422310756972107,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.06772908366533864,
"calib/gap": 0.009647806738715847,
"calib/mean_conf": 0.21119521912350597,
"calib/mu_c": 0.21584615384615385,
"calib/mu_w": 0.206198347107438,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.04374501992031873,
"calib/std_conf": 0.2894605604789032,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.30069444444444443,
"calib/step_q_c_n": 576.0,
"calib/step_q_gap": 0.029686839881706784,
"calib/step_q_w": 0.27100760456273765,
"calib/step_q_w_n": 526.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2795.0,
"completions/max_terminated_length": 2795.0,
"completions/mean_length": 488.0703125,
"completions/mean_terminated_length": 491.91339111328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.006307313684374094,
"learning_rate": 8.611111111111112e-07,
"loss": 0.1095,
"num_tokens": 38088126.0,
"reward": 1.0232303142547607,
"reward_std": 0.18713515996932983,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.5620855093002319,
"rewards/format_reward_step": 0.9765625,
"step": 169
},
{
"aux_distill/final_loss": 0.0001271916736413914,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07005494262557477,
"aux_distill/mean_u": 0.1803852533446794,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 146.875,
"aux_distill/step_loss": 0.7004222283139825,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4994199162101193,
"calib/avg_num_step_conf": 4.58984375,
"calib/ece": 0.4657142857142857,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.047619047619047616,
"calib/gap": -0.02286045762165645,
"calib/mean_conf": 0.19460317460317464,
"calib/mu_c": 0.18489655172413794,
"calib/mu_w": 0.20775700934579439,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.042460317460317455,
"calib/std_conf": 0.2587658462095125,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.25309985096870347,
"calib/step_q_c_n": 671.0,
"calib/step_q_gap": -0.04344776807891554,
"calib/step_q_w": 0.296547619047619,
"calib/step_q_w_n": 504.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2506.0,
"completions/max_terminated_length": 2506.0,
"completions/mean_length": 522.46484375,
"completions/mean_terminated_length": 524.5137329101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.005817190278321505,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0844,
"num_tokens": 38326029.0,
"reward": 1.0375053882598877,
"reward_std": 0.16187521815299988,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.5242296457290649,
"rewards/format_reward_step": 0.984375,
"step": 170
},
{
"aux_distill/final_loss": 0.00012730757214285404,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0673176534473896,
"aux_distill/mean_u": 0.1913334194705908,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 144.5,
"aux_distill/step_loss": 0.6730492170900106,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5233848720087253,
"calib/avg_num_step_conf": 4.6640625,
"calib/ece": 0.3166269841269841,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.06349206349206349,
"calib/gap": 0.024855969718355048,
"calib/mean_conf": 0.21543650793650795,
"calib/mu_c": 0.22954128440366972,
"calib/mu_w": 0.20468531468531467,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.04976190476190476,
"calib/std_conf": 0.2760337854876874,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.2694093686354379,
"calib/step_q_c_n": 491.0,
"calib/step_q_gap": 0.01905374985876651,
"calib/step_q_w": 0.2503556187766714,
"calib/step_q_w_n": 703.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1202.0,
"completions/max_terminated_length": 1202.0,
"completions/mean_length": 474.3046875,
"completions/mean_terminated_length": 479.9288635253906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.1824,
"grad_norm": 0.007274095434695482,
"learning_rate": 8.055555555555557e-07,
"loss": 0.026,
"num_tokens": 38554347.0,
"reward": 1.0217634439468384,
"reward_std": 0.17246311902999878,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6333706974983215,
"rewards/format_reward_step": 0.984375,
"step": 171
},
{
"aux_distill/final_loss": 0.00013447724222714896,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06745812157168984,
"aux_distill/mean_u": 0.16089452147621752,
"aux_distill/n_active_final_tok": 31.875,
"aux_distill/n_active_tok": 137.375,
"aux_distill/step_loss": 0.6744467271491885,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.494122965641953,
"calib/avg_num_step_conf": 4.29296875,
"calib/ece": 0.48296875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0703125,
"calib/gap": 0.023867217773185234,
"calib/mean_conf": 0.205546875,
"calib/mu_c": 0.21468354430379752,
"calib/mu_w": 0.19081632653061228,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0356640625,
"calib/std_conf": 0.28871024778787185,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2499405646359584,
"calib/step_q_c_n": 673.0,
"calib/step_q_gap": 0.00264478998807105,
"calib/step_q_w": 0.24729577464788735,
"calib/step_q_w_n": 426.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1298.0,
"completions/max_terminated_length": 1298.0,
"completions/mean_length": 457.23828125,
"completions/mean_terminated_length": 459.0314025878906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.00690126558765769,
"learning_rate": 7.777777777777779e-07,
"loss": 0.057,
"num_tokens": 38774752.0,
"reward": 1.0621392726898193,
"reward_std": 0.1700059324502945,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.5149035453796387,
"rewards/format_reward_step": 0.9921875,
"step": 172
},
{
"aux_distill/final_loss": 0.0001626137621997259,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06854678073432297,
"aux_distill/mean_u": 0.18265357143386787,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 163.25,
"aux_distill/step_loss": 0.6853051725775003,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5511278664007976,
"calib/avg_num_step_conf": 5.125,
"calib/ece": 0.3881889763779527,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.04330708661417323,
"calib/gap": 0.05203888334995019,
"calib/mean_conf": 0.19023622047244093,
"calib/mu_c": 0.21441176470588239,
"calib/mu_w": 0.1623728813559322,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.02149606299212599,
"calib/std_conf": 0.25156506826260105,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.295595567867036,
"calib/step_q_c_n": 722.0,
"calib/step_q_gap": 0.05186675430771395,
"calib/step_q_w": 0.24372881355932205,
"calib/step_q_w_n": 590.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1568.0,
"completions/max_terminated_length": 1568.0,
"completions/mean_length": 511.26171875,
"completions/mean_terminated_length": 513.2667236328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.006458665709942579,
"learning_rate": 7.5e-07,
"loss": 0.0733,
"num_tokens": 39008795.0,
"reward": 1.0473742485046387,
"reward_std": 0.16939207911491394,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.5830296277999878,
"rewards/format_reward_step": 0.98046875,
"step": 173
},
{
"aux_distill/final_loss": 0.0005857845847003773,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.072768610320054,
"aux_distill/mean_u": 0.20042516857950027,
"aux_distill/n_active_final_tok": 30.875,
"aux_distill/n_active_tok": 157.25,
"aux_distill/step_loss": 0.7271003052592278,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4796011768551814,
"calib/avg_num_step_conf": 4.953125,
"calib/ece": 0.4326032258064516,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.08870967741935484,
"calib/gap": -0.0583984831644328,
"calib/mean_conf": 0.22457419354838706,
"calib/mu_c": 0.19325565217391305,
"calib/mu_w": 0.25165413533834585,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.09673387096774194,
"calib/std_conf": 0.312205231267513,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.2402541889483066,
"calib/step_q_c_n": 561.0,
"calib/step_q_gap": -0.05041082521599935,
"calib/step_q_w": 0.29066501416430596,
"calib/step_q_w_n": 706.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2481.0,
"completions/max_terminated_length": 2481.0,
"completions/mean_length": 533.84375,
"completions/mean_terminated_length": 544.4780883789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.1856,
"grad_norm": 0.005857736337929964,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0468,
"num_tokens": 39249691.0,
"reward": 0.9747790098190308,
"reward_std": 0.22227507829666138,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.5433081388473511,
"rewards/format_reward_step": 0.95703125,
"step": 174
},
{
"aux_distill/final_loss": 0.00022364286769516184,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07627740735188127,
"aux_distill/mean_u": 0.21568897692772337,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 146.125,
"aux_distill/step_loss": 0.762550413608551,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5145747934153165,
"calib/avg_num_step_conf": 4.57421875,
"calib/ece": 0.331,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.076,
"calib/gap": 0.03670700761272691,
"calib/mean_conf": 0.2026,
"calib/mu_c": 0.223302752293578,
"calib/mu_w": 0.18659574468085108,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.04879999999999999,
"calib/std_conf": 0.2865994417300913,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.29833333333333334,
"calib/step_q_c_n": 498.0,
"calib/step_q_gap": 0.06271669143140171,
"calib/step_q_w": 0.23561664190193163,
"calib/step_q_w_n": 673.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2508.0,
"completions/max_terminated_length": 2508.0,
"completions/mean_length": 492.49609375,
"completions/mean_terminated_length": 500.3135070800781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.007421276066452265,
"learning_rate": 6.944444444444446e-07,
"loss": 0.0739,
"num_tokens": 39481594.0,
"reward": 1.0114911794662476,
"reward_std": 0.19522380828857422,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6206386685371399,
"rewards/format_reward_step": 0.9765625,
"step": 175
},
{
"aux_distill/final_loss": 0.0001829939112667489,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07527686783578247,
"aux_distill/mean_u": 0.23696593019136702,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 150.0,
"aux_distill/step_loss": 0.7525856709107757,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.529479302426612,
"calib/avg_num_step_conf": 4.703125,
"calib/ece": 0.42625984251968496,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.08661417322834646,
"calib/gap": 0.01572705269037422,
"calib/mean_conf": 0.1993307086614173,
"calib/mu_c": 0.20694656488549618,
"calib/mu_w": 0.19121951219512195,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.054921259842519686,
"calib/std_conf": 0.2925787209532093,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2625811965811966,
"calib/step_q_c_n": 585.0,
"calib/step_q_gap": 0.016991535838062538,
"calib/step_q_w": 0.24558966074313407,
"calib/step_q_w_n": 619.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1906.0,
"completions/max_terminated_length": 1906.0,
"completions/mean_length": 498.39453125,
"completions/mean_terminated_length": 500.34906005859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.006700032390654087,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0805,
"num_tokens": 39713247.0,
"reward": 1.0359079837799072,
"reward_std": 0.16493433713912964,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.5679097175598145,
"rewards/format_reward_step": 0.9921875,
"step": 176
},
{
"aux_distill/final_loss": 0.015147438318081186,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07469213579315692,
"aux_distill/mean_u": 0.1922451938702458,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 146.375,
"aux_distill/step_loss": 0.7317739073187113,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4699219733199094,
"calib/avg_num_step_conf": 4.57421875,
"calib/ece": 0.4300395256916996,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.05928853754940711,
"calib/gap": -0.0006972061414548258,
"calib/mean_conf": 0.18893280632411066,
"calib/mu_c": 0.18861313868613142,
"calib/mu_w": 0.18931034482758624,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.03873517786561265,
"calib/std_conf": 0.27523049526630516,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.2358448275862069,
"calib/step_q_c_n": 580.0,
"calib/step_q_gap": 0.017384590699574043,
"calib/step_q_w": 0.21846023688663285,
"calib/step_q_w_n": 591.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2900.0,
"completions/max_terminated_length": 2900.0,
"completions/mean_length": 503.703125,
"completions/mean_terminated_length": 507.6692810058594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.0,
"epoch": 0.1888,
"grad_norm": 0.006349343340843916,
"learning_rate": 6.388888888888889e-07,
"loss": 0.1273,
"num_tokens": 39946027.0,
"reward": 1.026729702949524,
"reward_std": 0.17228364944458008,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/final_brier_reward_step": 0.5378344058990479,
"rewards/format_reward_step": 0.98046875,
"step": 177
},
{
"aux_distill/final_loss": 0.00010571018469818227,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07261781021952629,
"aux_distill/mean_u": 0.19784925852864274,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 159.5,
"aux_distill/step_loss": 0.7260723812505603,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5133538461538462,
"calib/avg_num_step_conf": 4.9921875,
"calib/ece": 0.39415686274509804,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.07058823529411765,
"calib/gap": 0.007978461538461518,
"calib/mean_conf": 0.20662745098039217,
"calib/mu_c": 0.2105384615384615,
"calib/mu_w": 0.20256,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.045490196078431376,
"calib/std_conf": 0.2869631363074095,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.27271159874608153,
"calib/step_q_c_n": 638.0,
"calib/step_q_gap": 0.010164723746081517,
"calib/step_q_w": 0.262546875,
"calib/step_q_w_n": 640.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 955.0,
"completions/max_terminated_length": 955.0,
"completions/mean_length": 461.99609375,
"completions/mean_terminated_length": 463.807861328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.006635536905378103,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0964,
"num_tokens": 40170370.0,
"reward": 1.0407307147979736,
"reward_std": 0.16870535910129547,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.5775550603866577,
"rewards/format_reward_step": 0.99609375,
"step": 178
},
{
"aux_distill/final_loss": 9.58011798957159e-05,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07280069135595113,
"aux_distill/mean_u": 0.20255241438903543,
"aux_distill/n_active_final_tok": 30.75,
"aux_distill/n_active_tok": 150.5,
"aux_distill/step_loss": 0.7279111016541719,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5251181049970058,
"calib/avg_num_step_conf": 4.8046875,
"calib/ece": 0.4478861788617886,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.044715447154471545,
"calib/gap": 0.004235810765852666,
"calib/mean_conf": 0.15512195121951222,
"calib/mu_c": 0.15706766917293233,
"calib/mu_w": 0.15283185840707966,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.031178861788617875,
"calib/std_conf": 0.2406997449972085,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.22209523809523812,
"calib/step_q_c_n": 630.0,
"calib/step_q_gap": -0.05324309523809523,
"calib/step_q_w": 0.27533833333333335,
"calib/step_q_w_n": 600.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2846.0,
"completions/max_terminated_length": 2846.0,
"completions/mean_length": 475.2734375,
"completions/mean_terminated_length": 480.90911865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.007111471612006426,
"learning_rate": 5.833333333333334e-07,
"loss": 0.0896,
"num_tokens": 40398304.0,
"reward": 1.003140926361084,
"reward_std": 0.1745980978012085,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.5258132815361023,
"rewards/format_reward_step": 0.9609375,
"step": 179
},
{
"aux_distill/final_loss": 9.564100832903932e-05,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07134161016438156,
"aux_distill/mean_u": 0.2064909479776691,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 155.75,
"aux_distill/step_loss": 0.7133204592391849,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4595897897511734,
"calib/avg_num_step_conf": 4.8671875,
"calib/ece": 0.4600393700787401,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.08267716535433071,
"calib/gap": -0.022557062946055445,
"calib/mean_conf": 0.22484251968503935,
"calib/mu_c": 0.21569536423841054,
"calib/mu_w": 0.238252427184466,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04519685039370079,
"calib/std_conf": 0.29550932743574304,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.30337893296853624,
"calib/step_q_c_n": 731.0,
"calib/step_q_gap": 0.0012041756869828335,
"calib/step_q_w": 0.3021747572815534,
"calib/step_q_w_n": 515.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2400.0,
"completions/max_terminated_length": 2400.0,
"completions/mean_length": 525.796875,
"completions/mean_terminated_length": 525.796875,
"completions/min_length": 170.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.192,
"grad_norm": 0.006270106416195631,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0957,
"num_tokens": 40636764.0,
"reward": 1.0510127544403076,
"reward_std": 0.1763603389263153,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/final_brier_reward_step": 0.5199941396713257,
"rewards/format_reward_step": 0.9921875,
"step": 180
},
{
"aux_distill/final_loss": 0.0001657123884797329,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07269645959604532,
"aux_distill/mean_u": 0.16722964074674762,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 143.375,
"aux_distill/step_loss": 0.7267988743260503,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5390842398178598,
"calib/avg_num_step_conf": 4.48046875,
"calib/ece": 0.3808730158730158,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.047619047619047616,
"calib/gap": 0.019853275992916808,
"calib/mean_conf": 0.16571428571428573,
"calib/mu_c": 0.176271186440678,
"calib/mu_w": 0.15641791044776118,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03916666666666667,
"calib/std_conf": 0.24985211045237474,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2612891344383057,
"calib/step_q_c_n": 543.0,
"calib/step_q_gap": 0.06155403510055735,
"calib/step_q_w": 0.19973509933774836,
"calib/step_q_w_n": 604.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2047.0,
"completions/max_terminated_length": 2047.0,
"completions/mean_length": 449.953125,
"completions/mean_terminated_length": 453.4960632324219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.007513204589486122,
"learning_rate": 5.277777777777779e-07,
"loss": 0.0637,
"num_tokens": 40858216.0,
"reward": 1.02138352394104,
"reward_std": 0.15062540769577026,
"rewards/accuracy_reward_step": 0.4609375,
"rewards/final_brier_reward_step": 0.5974546670913696,
"rewards/format_reward_step": 0.984375,
"step": 181
},
{
"aux_distill/final_loss": 0.00011876001019572868,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0664075068780221,
"aux_distill/mean_u": 0.18295012680177067,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 156.625,
"aux_distill/step_loss": 0.6639563012868166,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5261798469387755,
"calib/avg_num_step_conf": 4.89453125,
"calib/ece": 0.41361111111111115,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.09126984126984126,
"calib/gap": 0.03301785714285713,
"calib/mean_conf": 0.23718253968253966,
"calib/mu_c": 0.25185714285714284,
"calib/mu_w": 0.2188392857142857,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0476190476190476,
"calib/std_conf": 0.30725163930216465,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2820192307692308,
"calib/step_q_c_n": 728.0,
"calib/step_q_gap": 0.02697161172161172,
"calib/step_q_w": 0.2550476190476191,
"calib/step_q_w_n": 525.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2480.0,
"completions/max_terminated_length": 2480.0,
"completions/mean_length": 516.96875,
"completions/mean_terminated_length": 516.96875,
"completions/min_length": 158.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.006183520890772343,
"learning_rate": 5.000000000000001e-07,
"loss": 0.1237,
"num_tokens": 41096720.0,
"reward": 1.049910068511963,
"reward_std": 0.1884315013885498,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.5646636486053467,
"rewards/format_reward_step": 0.984375,
"step": 182
},
{
"aux_distill/final_loss": 0.00017398772183696565,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07154302636627108,
"aux_distill/mean_u": 0.17953013342888505,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 138.25,
"aux_distill/step_loss": 0.7152562653645873,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5100299962504686,
"calib/avg_num_step_conf": 4.34375,
"calib/ece": 0.4064229249011858,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.05533596837944664,
"calib/gap": -0.011224221972253479,
"calib/mean_conf": 0.18515810276679842,
"calib/mu_c": 0.17952380952380953,
"calib/mu_w": 0.190748031496063,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.046778656126482215,
"calib/std_conf": 0.2659719566474682,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.23727272727272727,
"calib/step_q_c_n": 550.0,
"calib/step_q_gap": -0.0048482691685538815,
"calib/step_q_w": 0.24212099644128116,
"calib/step_q_w_n": 562.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2037.0,
"completions/max_terminated_length": 2037.0,
"completions/mean_length": 502.71875,
"completions/mean_terminated_length": 504.6902160644531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.0,
"epoch": 0.1952,
"grad_norm": 0.006408975459635258,
"learning_rate": 4.7222222222222226e-07,
"loss": 0.0843,
"num_tokens": 41332096.0,
"reward": 1.0169315338134766,
"reward_std": 0.15201400220394135,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.5612065196037292,
"rewards/format_reward_step": 0.98046875,
"step": 183
},
{
"aux_distill/final_loss": 0.00013644496129927575,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.0730898812180385,
"aux_distill/mean_u": 0.20595037076727493,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 149.75,
"aux_distill/step_loss": 0.7307623568922281,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5064565770105715,
"calib/avg_num_step_conf": 4.6796875,
"calib/ece": 0.47584980237154145,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.08695652173913043,
"calib/gap": 0.03185133145992239,
"calib/mean_conf": 0.2167193675889328,
"calib/mu_c": 0.22855345911949684,
"calib/mu_w": 0.19670212765957445,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.03205533596837946,
"calib/std_conf": 0.29063768702452825,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.29242465753424657,
"calib/step_q_c_n": 730.0,
"calib/step_q_gap": 0.05242465753424652,
"calib/step_q_w": 0.24000000000000005,
"calib/step_q_w_n": 468.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2746.0,
"completions/max_terminated_length": 2746.0,
"completions/mean_length": 499.58984375,
"completions/mean_terminated_length": 499.58984375,
"completions/min_length": 187.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.006794987712055445,
"learning_rate": 4.444444444444445e-07,
"loss": 0.1262,
"num_tokens": 41565271.0,
"reward": 1.0633325576782227,
"reward_std": 0.1867324709892273,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.5211964845657349,
"rewards/format_reward_step": 0.984375,
"step": 184
},
{
"aux_distill/final_loss": 0.00027000475074601127,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07329281861893833,
"aux_distill/mean_u": 0.20704989977177685,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 156.125,
"aux_distill/step_loss": 0.7326581748202443,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5283043032786885,
"calib/avg_num_step_conf": 5.00390625,
"calib/ece": 0.3753924,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.084,
"calib/gap": 0.038675512295081926,
"calib/mean_conf": 0.2294076,
"calib/mu_c": 0.24828124999999995,
"calib/mu_w": 0.20960573770491803,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0464,
"calib/std_conf": 0.30866020070984207,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.28440901213171577,
"calib/step_q_c_n": 577.0,
"calib/step_q_gap": 0.04926142690444307,
"calib/step_q_w": 0.2351475852272727,
"calib/step_q_w_n": 704.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1447.0,
"completions/max_terminated_length": 1447.0,
"completions/mean_length": 492.12890625,
"completions/mean_terminated_length": 501.9322814941406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.006193746812641621,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0648,
"num_tokens": 41798176.0,
"reward": 1.030439853668213,
"reward_std": 0.19291558861732483,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.5804111957550049,
"rewards/format_reward_step": 0.9765625,
"step": 185
},
{
"aux_distill/final_loss": 0.00014655543202479748,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07584030227735639,
"aux_distill/mean_u": 0.20989452434011863,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 160.875,
"aux_distill/step_loss": 0.7582564577460289,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5442945795339412,
"calib/avg_num_step_conf": 5.02734375,
"calib/ece": 0.42108300395256903,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.11462450592885376,
"calib/gap": 0.036618161094224916,
"calib/mean_conf": 0.2497470355731225,
"calib/mu_c": 0.26595744680851063,
"calib/mu_w": 0.22933928571428572,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05675889328063241,
"calib/std_conf": 0.3251910949917958,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33207868383404865,
"calib/step_q_c_n": 699.0,
"calib/step_q_gap": 0.07436950016057925,
"calib/step_q_w": 0.2577091836734694,
"calib/step_q_w_n": 588.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1359.0,
"completions/max_terminated_length": 1359.0,
"completions/mean_length": 476.3984375,
"completions/mean_terminated_length": 480.14959716796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.1984,
"grad_norm": 0.006201084237545729,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.0511,
"num_tokens": 42025174.0,
"reward": 1.0477831363677979,
"reward_std": 0.2021675556898117,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.5604099035263062,
"rewards/format_reward_step": 0.984375,
"step": 186
},
{
"aux_distill/final_loss": 0.004851514590882289,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06904444005340338,
"aux_distill/mean_u": 0.19692998760247768,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 197.125,
"aux_distill/step_loss": 0.6855928674340248,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.45261437908496727,
"calib/avg_num_step_conf": 6.296875,
"calib/ece": 0.4537549407114624,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.06324110671936758,
"calib/gap": -0.04977438411261942,
"calib/mean_conf": 0.19794466403162056,
"calib/mu_c": 0.1749264705882353,
"calib/mu_w": 0.2247008547008547,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.057075098814229244,
"calib/std_conf": 0.2702533668863807,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.23106534090909092,
"calib/step_q_c_n": 704.0,
"calib/step_q_gap": 0.0018582924509411292,
"calib/step_q_w": 0.2292070484581498,
"calib/step_q_w_n": 908.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2587.0,
"completions/max_terminated_length": 2587.0,
"completions/mean_length": 542.12109375,
"completions/mean_terminated_length": 548.5494384765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.005682837218046188,
"learning_rate": 3.611111111111111e-07,
"loss": 0.04,
"num_tokens": 42265501.0,
"reward": 1.025758981704712,
"reward_std": 0.1700371503829956,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.5319867134094238,
"rewards/format_reward_step": 0.98828125,
"step": 187
},
{
"aux_distill/final_loss": 0.00015512108848270145,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06917913688812405,
"aux_distill/mean_u": 0.21871284821246945,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 149.625,
"aux_distill/step_loss": 0.6916362354531884,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.47536195072390147,
"calib/avg_num_step_conf": 5.2265625,
"calib/ece": 0.36473505976095627,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.09163346613545817,
"calib/gap": 0.011983140716281454,
"calib/mean_conf": 0.23072310756972114,
"calib/mu_c": 0.23678629032258064,
"calib/mu_w": 0.22480314960629919,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.05071713147410359,
"calib/std_conf": 0.3014775764588048,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2872699300699301,
"calib/step_q_c_n": 572.0,
"calib/step_q_gap": 0.051799956179590684,
"calib/step_q_w": 0.23546997389033944,
"calib/step_q_w_n": 766.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2798.0,
"completions/max_terminated_length": 2798.0,
"completions/mean_length": 512.953125,
"completions/mean_terminated_length": 521.0952758789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.00596034899353981,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.074,
"num_tokens": 42500889.0,
"reward": 1.0225554704666138,
"reward_std": 0.19933326542377472,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5841735005378723,
"rewards/format_reward_step": 0.9765625,
"step": 188
},
{
"aux_distill/final_loss": 0.0001890165859776971,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07034963823389262,
"aux_distill/mean_u": 0.22055540871474788,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 150.0,
"aux_distill/step_loss": 0.7033073594793677,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5282632010598701,
"calib/avg_num_step_conf": 4.6875,
"calib/ece": 0.3914682539682539,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.06746031746031746,
"calib/gap": 0.02654659012049712,
"calib/mean_conf": 0.21503968253968253,
"calib/mu_c": 0.2277862595419847,
"calib/mu_w": 0.20123966942148758,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.04333333333333332,
"calib/std_conf": 0.2851767733811604,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.27550335570469797,
"calib/step_q_c_n": 596.0,
"calib/step_q_gap": 0.048821236499399956,
"calib/step_q_w": 0.22668211920529802,
"calib/step_q_w_n": 604.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2934.0,
"completions/max_terminated_length": 2934.0,
"completions/mean_length": 498.8203125,
"completions/mean_terminated_length": 500.7764892578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.2016,
"grad_norm": 0.007148802746087313,
"learning_rate": 3.055555555555556e-07,
"loss": 0.0749,
"num_tokens": 42736355.0,
"reward": 1.0361970663070679,
"reward_std": 0.17948536574840546,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.5802066326141357,
"rewards/format_reward_step": 0.98046875,
"step": 189
},
{
"aux_distill/final_loss": 0.0004349350431880339,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06765694764908403,
"aux_distill/mean_u": 0.20189786745076008,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 149.125,
"aux_distill/step_loss": 0.676134523935616,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5152868217054263,
"calib/avg_num_step_conf": 4.66015625,
"calib/ece": 0.3947244094488189,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.06299212598425197,
"calib/gap": 0.03580279069767445,
"calib/mean_conf": 0.19354330708661416,
"calib/mu_c": 0.21116279069767444,
"calib/mu_w": 0.17536,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.040196850393700786,
"calib/std_conf": 0.27659352922286673,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2808823529411765,
"calib/step_q_c_n": 578.0,
"calib/step_q_gap": 0.041440076518412305,
"calib/step_q_w": 0.23944227642276422,
"calib/step_q_w_n": 615.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1766.0,
"completions/max_terminated_length": 1766.0,
"completions/mean_length": 532.890625,
"completions/mean_terminated_length": 537.0866088867188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.006504186429083347,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0497,
"num_tokens": 42978383.0,
"reward": 1.0420573949813843,
"reward_std": 0.16218328475952148,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.5880210995674133,
"rewards/format_reward_step": 0.9921875,
"step": 190
},
{
"aux_distill/final_loss": 0.00032513609193074444,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07050644035916775,
"aux_distill/mean_u": 0.16254164632472845,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 172.75,
"aux_distill/step_loss": 0.7047392604872584,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5509948692278813,
"calib/avg_num_step_conf": 5.56640625,
"calib/ece": 0.3572446640316206,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.07905138339920949,
"calib/gap": 0.05162393943186086,
"calib/mean_conf": 0.23769604743083003,
"calib/mu_c": 0.2644262295081967,
"calib/mu_w": 0.21280229007633586,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.056363636363636366,
"calib/std_conf": 0.3083635465198859,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2993606557377049,
"calib/step_q_c_n": 610.0,
"calib/step_q_gap": 0.05421611586040426,
"calib/step_q_w": 0.24514453987730062,
"calib/step_q_w_n": 815.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1881.0,
"completions/max_terminated_length": 1881.0,
"completions/mean_length": 477.61328125,
"completions/mean_terminated_length": 483.2767028808594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.0066607119515538216,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0038,
"num_tokens": 43204820.0,
"reward": 1.039391279220581,
"reward_std": 0.18578588962554932,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.6139389276504517,
"rewards/format_reward_step": 0.98828125,
"step": 191
},
{
"aux_distill/final_loss": 0.0001435406411474105,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07215216173790395,
"aux_distill/mean_u": 0.20260344511299375,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 152.5,
"aux_distill/step_loss": 0.7213780581951141,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5733936250948647,
"calib/avg_num_step_conf": 4.765625,
"calib/ece": 0.370952380952381,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0873015873015873,
"calib/gap": 0.07278522641032123,
"calib/mean_conf": 0.22412698412698415,
"calib/mu_c": 0.25820895522388054,
"calib/mu_w": 0.1854237288135593,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.03166666666666667,
"calib/std_conf": 0.2952649330693171,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.292048,
"calib/step_q_c_n": 625.0,
"calib/step_q_gap": 0.05181270588235293,
"calib/step_q_w": 0.24023529411764705,
"calib/step_q_w_n": 595.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2600.0,
"completions/max_terminated_length": 2600.0,
"completions/mean_length": 538.3828125,
"completions/mean_terminated_length": 538.3828125,
"completions/min_length": 157.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.2048,
"grad_norm": 0.005982318893074989,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.1014,
"num_tokens": 43447622.0,
"reward": 1.048103928565979,
"reward_std": 0.18802429735660553,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5923015475273132,
"rewards/format_reward_step": 0.98046875,
"step": 192
},
{
"aux_distill/final_loss": 0.0001167211955817038,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07536566106136888,
"aux_distill/mean_u": 0.1909164239972604,
"aux_distill/n_active_final_tok": 31.25,
"aux_distill/n_active_tok": 154.0,
"aux_distill/step_loss": 0.7535398826003075,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.511112538540596,
"calib/avg_num_step_conf": 4.8125,
"calib/ece": 0.4472509960159363,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.08366533864541832,
"calib/gap": -0.002033016443987651,
"calib/mean_conf": 0.2137848605577689,
"calib/mu_c": 0.2128776978417266,
"calib/mu_w": 0.21491071428571426,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.053625498007968134,
"calib/std_conf": 0.3043947975226336,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.26211312803889786,
"calib/step_q_c_n": 617.0,
"calib/step_q_gap": -0.01899256301801272,
"calib/step_q_w": 0.2811056910569106,
"calib/step_q_w_n": 615.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1865.0,
"completions/max_terminated_length": 1865.0,
"completions/mean_length": 512.19921875,
"completions/mean_terminated_length": 516.2322998046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.006591039709746838,
"learning_rate": 1.9444444444444447e-07,
"loss": 0.0923,
"num_tokens": 43684457.0,
"reward": 1.0221588611602783,
"reward_std": 0.19100113213062286,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.5286929607391357,
"rewards/format_reward_step": 0.97265625,
"step": 193
},
{
"aux_distill/final_loss": 0.00011849602537949977,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06704711669590324,
"aux_distill/mean_u": 0.18057837039754573,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 140.625,
"aux_distill/step_loss": 0.6703526610508561,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4985137869972173,
"calib/avg_num_step_conf": 4.39453125,
"calib/ece": 0.4415079365079365,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0873015873015873,
"calib/gap": -0.014037439919048822,
"calib/mean_conf": 0.22634920634920636,
"calib/mu_c": 0.21977611940298508,
"calib/mu_w": 0.2338135593220339,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.06805555555555554,
"calib/std_conf": 0.30648960513356027,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.274263698630137,
"calib/step_q_c_n": 584.0,
"calib/step_q_gap": 0.011620445395386536,
"calib/step_q_w": 0.26264325323475046,
"calib/step_q_w_n": 541.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2536.0,
"completions/max_terminated_length": 2536.0,
"completions/mean_length": 475.30078125,
"completions/mean_terminated_length": 477.16473388671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.006791603751480579,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.1045,
"num_tokens": 43912078.0,
"reward": 1.0240570306777954,
"reward_std": 0.20932304859161377,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5442078113555908,
"rewards/format_reward_step": 0.98046875,
"step": 194
},
{
"aux_distill/final_loss": 0.00012114963419662672,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07237316464306787,
"aux_distill/mean_u": 0.228926683168374,
"aux_distill/n_active_final_tok": 31.5,
"aux_distill/n_active_tok": 136.75,
"aux_distill/step_loss": 0.723610489629209,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5112593828190158,
"calib/avg_num_step_conf": 4.30078125,
"calib/ece": 0.42269841269841274,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.10317460317460317,
"calib/gap": -0.004797587733367525,
"calib/mean_conf": 0.2603968253968254,
"calib/mu_c": 0.25832167832167835,
"calib/mu_w": 0.2631192660550459,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.05781746031746032,
"calib/std_conf": 0.3235576070335652,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3183720930232558,
"calib/step_q_c_n": 602.0,
"calib/step_q_gap": -0.001267185533858406,
"calib/step_q_w": 0.3196392785571142,
"calib/step_q_w_n": 499.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2981.0,
"completions/max_terminated_length": 2981.0,
"completions/mean_length": 460.64453125,
"completions/mean_terminated_length": 464.2716369628906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.208,
"grad_norm": 0.006784157827496529,
"learning_rate": 1.3888888888888888e-07,
"loss": 0.0426,
"num_tokens": 44135987.0,
"reward": 1.043771505355835,
"reward_std": 0.18392643332481384,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.5445742011070251,
"rewards/format_reward_step": 0.984375,
"step": 195
},
{
"aux_distill/final_loss": 0.0080426042968611,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06883227336220443,
"aux_distill/mean_u": 0.1723530723501916,
"aux_distill/n_active_final_tok": 31.75,
"aux_distill/n_active_tok": 155.25,
"aux_distill/step_loss": 0.680280108936131,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5325003116041381,
"calib/avg_num_step_conf": 4.86328125,
"calib/ece": 0.3845882352941177,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.07450980392156863,
"calib/gap": 0.0532500311604138,
"calib/mean_conf": 0.24098039215686273,
"calib/mu_c": 0.2645774647887324,
"calib/mu_w": 0.2113274336283186,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.03435294117647059,
"calib/std_conf": 0.3023441416791292,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3001815431164902,
"calib/step_q_c_n": 661.0,
"calib/step_q_gap": 0.02321236503429841,
"calib/step_q_w": 0.2769691780821918,
"calib/step_q_w_n": 584.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 964.0,
"completions/max_terminated_length": 964.0,
"completions/mean_length": 427.60546875,
"completions/mean_terminated_length": 429.2823791503906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.00739358039572835,
"learning_rate": 1.1111111111111112e-07,
"loss": 0.0675,
"num_tokens": 44347998.0,
"reward": 1.064500331878662,
"reward_std": 0.18100447952747345,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.5821257829666138,
"rewards/format_reward_step": 0.9921875,
"step": 196
},
{
"aux_distill/final_loss": 0.00013514872648556775,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07078921876382083,
"aux_distill/mean_u": 0.22375121075837906,
"aux_distill/n_active_final_tok": 31.625,
"aux_distill/n_active_tok": 147.625,
"aux_distill/step_loss": 0.707757031545043,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4975877192982456,
"calib/avg_num_step_conf": 4.61328125,
"calib/ece": 0.3769565217391305,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.06719367588932806,
"calib/gap": -0.008139724310776947,
"calib/mean_conf": 0.19869565217391305,
"calib/mu_c": 0.19441666666666665,
"calib/mu_w": 0.2025563909774436,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.050671936758893296,
"calib/std_conf": 0.2776658039383831,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.23487940630797774,
"calib/step_q_c_n": 539.0,
"calib/step_q_gap": -0.03834489275744288,
"calib/step_q_w": 0.2732242990654206,
"calib/step_q_w_n": 642.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2541.0,
"completions/max_terminated_length": 2541.0,
"completions/mean_length": 506.9375,
"completions/mean_terminated_length": 506.9375,
"completions/min_length": 156.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.006808362435549498,
"learning_rate": 8.333333333333334e-08,
"loss": 0.1027,
"num_tokens": 44582830.0,
"reward": 1.0218079090118408,
"reward_std": 0.16181005537509918,
"rewards/accuracy_reward_step": 0.46875,
"rewards/final_brier_reward_step": 0.5865848064422607,
"rewards/format_reward_step": 0.98828125,
"step": 197
},
{
"aux_distill/final_loss": 0.00012877855374426872,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07026364665944129,
"aux_distill/mean_u": 0.18823215760758855,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 161.125,
"aux_distill/step_loss": 0.7025076858699322,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4268308323714249,
"calib/avg_num_step_conf": 5.0546875,
"calib/ece": 0.46108764940239044,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0796812749003984,
"calib/gap": -0.06961645504681288,
"calib/mean_conf": 0.21508764940239045,
"calib/mu_c": 0.1837463768115942,
"calib/mu_w": 0.2533628318584071,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.06318725099601592,
"calib/std_conf": 0.29121036106102993,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2709907932011332,
"calib/step_q_c_n": 706.0,
"calib/step_q_gap": -0.0014921999961457488,
"calib/step_q_w": 0.2724829931972789,
"calib/step_q_w_n": 588.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1387.0,
"completions/max_terminated_length": 1387.0,
"completions/mean_length": 459.11328125,
"completions/mean_terminated_length": 468.25897216796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.2112,
"grad_norm": 0.006058075465261936,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0379,
"num_tokens": 44805747.0,
"reward": 1.0152664184570312,
"reward_std": 0.1935424953699112,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.5110015273094177,
"rewards/format_reward_step": 0.98046875,
"step": 198
},
{
"aux_distill/final_loss": 0.00014436346458523985,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.07156735879834741,
"aux_distill/mean_u": 0.20905314065619962,
"aux_distill/n_active_final_tok": 31.375,
"aux_distill/n_active_tok": 157.875,
"aux_distill/step_loss": 0.7155292062088847,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4812260536398467,
"calib/avg_num_step_conf": 5.01171875,
"calib/ece": 0.44147410358565725,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.08764940239043825,
"calib/gap": -0.041166028097062546,
"calib/mean_conf": 0.24561752988047805,
"calib/mu_c": 0.22659259259259257,
"calib/mu_w": 0.2677586206896551,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0746215139442231,
"calib/std_conf": 0.3130697469664095,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.2637047756874096,
"calib/step_q_c_n": 691.0,
"calib/step_q_gap": -0.03202157566394176,
"calib/step_q_w": 0.29572635135135134,
"calib/step_q_w_n": 592.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1851.0,
"completions/max_terminated_length": 1851.0,
"completions/mean_length": 522.2890625,
"completions/mean_terminated_length": 530.5794067382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.005946105346083641,
"learning_rate": 2.777777777777778e-08,
"loss": 0.0273,
"num_tokens": 45043653.0,
"reward": 1.022146463394165,
"reward_std": 0.1993604302406311,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.5364804267883301,
"rewards/format_reward_step": 0.9765625,
"step": 199
},
{
"aux_distill/final_loss": 0.00012137726037053653,
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/lambda_final": 0.10000000000000005,
"aux_distill/loss": 0.06986102659720927,
"aux_distill/mean_u": 0.19741112501193644,
"aux_distill/n_active_final_tok": 31.125,
"aux_distill/n_active_tok": 146.625,
"aux_distill/step_loss": 0.6984888771548867,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5459529505582137,
"calib/avg_num_step_conf": 4.68359375,
"calib/ece": 0.4782868525896413,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.09561752988047809,
"calib/gap": 0.0029173312068048463,
"calib/mean_conf": 0.22792828685258967,
"calib/mu_c": 0.22907894736842105,
"calib/mu_w": 0.2261616161616162,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0503187250996016,
"calib/std_conf": 0.313415338074085,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.27278184523809523,
"calib/step_q_c_n": 672.0,
"calib/step_q_gap": 0.0038021488434083484,
"calib/step_q_w": 0.2689796963946869,
"calib/step_q_w_n": 527.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2644.0,
"completions/max_terminated_length": 2644.0,
"completions/mean_length": 506.9921875,
"completions/mean_terminated_length": 513.0039672851562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.006176741328090429,
"learning_rate": 0.0,
"loss": 0.0705,
"num_tokens": 45281491.0,
"reward": 1.036815881729126,
"reward_std": 0.21423842012882233,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.5072253942489624,
"rewards/format_reward_step": 0.97265625,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.11054307345300912,
"train_runtime": 17801.0374,
"train_samples_per_second": 2.876,
"train_steps_per_second": 0.011
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 45281491,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}