12043 lines
495 KiB
JSON
12043 lines
495 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.21333333333333335,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"adv/mean_abs_final_conf": 0.773959219455719,
|
|
"adv/mean_abs_reasoning": 0.47714588046073914,
|
|
"adv/mean_abs_step_conf": 0.7498364448547363,
|
|
"adv/ratio_final_to_reasoning": 1.622059942565935,
|
|
"adv/ratio_step_to_reasoning": 1.5715035496705603,
|
|
"adv/std_final_conf": 0.9294352531433105,
|
|
"adv/std_reasoning": 0.7393431663513184,
|
|
"adv/std_step_conf": 0.9352971315383911,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.38076182006817844,
|
|
"calib/avg_num_step_conf": 5.23046875,
|
|
"calib/ece": 0.2003187250996017,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.2948207171314741,
|
|
"calib/gap": -0.026059730250481805,
|
|
"calib/mean_conf": 0.8737051792828686,
|
|
"calib/mu_c": 0.865606936416185,
|
|
"calib/mu_w": 0.8916666666666668,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.19239043824701207,
|
|
"calib/std_conf": 0.09027744273295583,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.7959393232205367,
|
|
"calib/step_q_c_n": 857.0,
|
|
"calib/step_q_gap": -0.006446568895645877,
|
|
"calib/step_q_w": 0.8023858921161826,
|
|
"calib/step_q_w_n": 482.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2492.0,
|
|
"completions/max_terminated_length": 2492.0,
|
|
"completions/mean_length": 474.94921875,
|
|
"completions/mean_terminated_length": 478.68896484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 138.0,
|
|
"epoch": 0.0010666666666666667,
|
|
"grad_norm": 0.04299506917595863,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": -0.0136,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03466901555657387,
|
|
"mask/share_reasoning": 0.8340686559677124,
|
|
"mask/share_step_conf": 0.12344987690448761,
|
|
"num_tokens": 229171.0,
|
|
"reward": 1.0788748264312744,
|
|
"reward_std": 0.22853493690490723,
|
|
"rewards/accuracy_reward_step": 0.67578125,
|
|
"rewards/final_brier_reward_step": 0.7142800688743591,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.7420004606246948,
|
|
"step": 1
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7672724723815918,
|
|
"adv/mean_abs_reasoning": 0.5104547739028931,
|
|
"adv/mean_abs_step_conf": 0.770571768283844,
|
|
"adv/ratio_final_to_reasoning": 1.503115479781084,
|
|
"adv/ratio_step_to_reasoning": 1.509578923891962,
|
|
"adv/std_final_conf": 0.9330522418022156,
|
|
"adv/std_reasoning": 0.7575037479400635,
|
|
"adv/std_step_conf": 0.9354329705238342,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.44343065693430656,
|
|
"calib/avg_num_step_conf": 5.05859375,
|
|
"calib/ece": 0.3349411764705883,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.2823529411764706,
|
|
"calib/gap": 0.002352468143016151,
|
|
"calib/mean_conf": 0.8721960784313726,
|
|
"calib/mu_c": 0.8732846715328467,
|
|
"calib/mu_w": 0.8709322033898306,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3349411764705883,
|
|
"calib/std_conf": 0.07627016470309335,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.7954391371340525,
|
|
"calib/step_q_c_n": 649.0,
|
|
"calib/step_q_gap": 0.011011892552009073,
|
|
"calib/step_q_w": 0.7844272445820434,
|
|
"calib/step_q_w_n": 646.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1966.0,
|
|
"completions/max_terminated_length": 1966.0,
|
|
"completions/mean_length": 492.9765625,
|
|
"completions/mean_terminated_length": 494.9098205566406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 161.0,
|
|
"epoch": 0.0021333333333333334,
|
|
"grad_norm": 0.040479063987731934,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": -0.0158,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03364308178424835,
|
|
"mask/share_reasoning": 0.8523939251899719,
|
|
"mask/share_step_conf": 0.11005672812461853,
|
|
"num_tokens": 458661.0,
|
|
"reward": 1.016056776046753,
|
|
"reward_std": 0.2184845209121704,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/final_brier_reward_step": 0.6320762038230896,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.7291916012763977,
|
|
"step": 2
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7734627723693848,
|
|
"adv/mean_abs_reasoning": 0.40483397245407104,
|
|
"adv/mean_abs_step_conf": 0.7342471480369568,
|
|
"adv/ratio_final_to_reasoning": 1.9105678500268037,
|
|
"adv/ratio_step_to_reasoning": 1.8136994372927981,
|
|
"adv/std_final_conf": 0.9286358952522278,
|
|
"adv/std_reasoning": 0.681647002696991,
|
|
"adv/std_step_conf": 0.9334685206413269,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.45913971367974554,
|
|
"calib/avg_num_step_conf": 4.94140625,
|
|
"calib/ece": 0.2468359375000001,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.32421875,
|
|
"calib/gap": -0.006550901378579055,
|
|
"calib/mean_conf": 0.8855859375,
|
|
"calib/mu_c": 0.8832317073170731,
|
|
"calib/mu_w": 0.8897826086956522,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.24589843750000012,
|
|
"calib/std_conf": 0.041828897633646694,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.7931343283582089,
|
|
"calib/step_q_c_n": 737.0,
|
|
"calib/step_q_gap": 0.01995251017639066,
|
|
"calib/step_q_w": 0.7731818181818182,
|
|
"calib/step_q_w_n": 528.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1161.0,
|
|
"completions/max_terminated_length": 1161.0,
|
|
"completions/mean_length": 485.28515625,
|
|
"completions/mean_terminated_length": 487.1882629394531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 191.0,
|
|
"epoch": 0.0032,
|
|
"grad_norm": 0.06393119692802429,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.0896,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03299188241362572,
|
|
"mask/share_reasoning": 0.8538076281547546,
|
|
"mask/share_step_conf": 0.10929422080516815,
|
|
"num_tokens": 688150.0,
|
|
"reward": 1.0805888175964355,
|
|
"reward_std": 0.19701236486434937,
|
|
"rewards/accuracy_reward_step": 0.640625,
|
|
"rewards/final_brier_reward_step": 0.702303946018219,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.7553948760032654,
|
|
"step": 3
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.75523841381073,
|
|
"adv/mean_abs_reasoning": 0.4327036142349243,
|
|
"adv/mean_abs_step_conf": 0.7525294423103333,
|
|
"adv/ratio_final_to_reasoning": 1.7453942813629804,
|
|
"adv/ratio_step_to_reasoning": 1.7391337108216722,
|
|
"adv/std_final_conf": 0.9310765862464905,
|
|
"adv/std_reasoning": 0.7205736637115479,
|
|
"adv/std_step_conf": 0.9354071617126465,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.47407503908285564,
|
|
"calib/avg_num_step_conf": 5.203125,
|
|
"calib/ece": 0.2733992094861661,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.22924901185770752,
|
|
"calib/gap": 0.0018649035956228577,
|
|
"calib/mean_conf": 0.8741897233201581,
|
|
"calib/mu_c": 0.8749342105263158,
|
|
"calib/mu_w": 0.873069306930693,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2733992094861661,
|
|
"calib/std_conf": 0.05110474763031544,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.7992339832869081,
|
|
"calib/step_q_c_n": 718.0,
|
|
"calib/step_q_gap": 0.0020678595084063778,
|
|
"calib/step_q_w": 0.7971661237785017,
|
|
"calib/step_q_w_n": 614.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2940.0,
|
|
"completions/max_terminated_length": 2940.0,
|
|
"completions/mean_length": 523.75390625,
|
|
"completions/mean_terminated_length": 525.807861328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 168.0,
|
|
"epoch": 0.004266666666666667,
|
|
"grad_norm": 0.03903208300471306,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 0.0801,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.032517582178115845,
|
|
"mask/share_reasoning": 0.8548072576522827,
|
|
"mask/share_step_conf": 0.10876892507076263,
|
|
"num_tokens": 928399.0,
|
|
"reward": 1.0342053174972534,
|
|
"reward_std": 0.22188444435596466,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/final_brier_reward_step": 0.6746652722358704,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.7187469601631165,
|
|
"step": 4
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7322856187820435,
|
|
"adv/mean_abs_reasoning": 0.45622092485427856,
|
|
"adv/mean_abs_step_conf": 0.7435009479522705,
|
|
"adv/ratio_final_to_reasoning": 1.605111863327932,
|
|
"adv/ratio_step_to_reasoning": 1.6296949733065225,
|
|
"adv/std_final_conf": 0.9311108589172363,
|
|
"adv/std_reasoning": 0.7392563819885254,
|
|
"adv/std_step_conf": 0.9351912140846252,
|
|
"calib/answer_extract_rate": 0.953125,
|
|
"calib/auroc": 0.4667686034658512,
|
|
"calib/avg_num_step_conf": 5.0078125,
|
|
"calib/ece": 0.32454918032786884,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.9375,
|
|
"calib/frac_conf_gt_0.9": 0.2786885245901639,
|
|
"calib/gap": -0.0030968399592251616,
|
|
"calib/mean_conf": 0.877827868852459,
|
|
"calib/mu_c": 0.8764444444444446,
|
|
"calib/mu_w": 0.8795412844036697,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
|
"calib/pce": 0.32454918032786884,
|
|
"calib/std_conf": 0.04913801116610084,
|
|
"calib/step_conf_rate": 0.96484375,
|
|
"calib/step_q_c": 0.8004434907010014,
|
|
"calib/step_q_c_n": 699.0,
|
|
"calib/step_q_gap": 0.00407985433736513,
|
|
"calib/step_q_w": 0.7963636363636363,
|
|
"calib/step_q_w_n": 583.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2773.0,
|
|
"completions/max_terminated_length": 2773.0,
|
|
"completions/mean_length": 533.38671875,
|
|
"completions/mean_terminated_length": 535.4784545898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 170.0,
|
|
"epoch": 0.005333333333333333,
|
|
"grad_norm": 0.040636006742715836,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": -0.0229,
|
|
"mask/has_final_conf_rate": 0.953125,
|
|
"mask/share_final_conf": 0.03362197056412697,
|
|
"mask/share_reasoning": 0.8532348871231079,
|
|
"mask/share_step_conf": 0.10923691093921661,
|
|
"num_tokens": 1171634.0,
|
|
"reward": 0.9456138610839844,
|
|
"reward_std": 0.22638621926307678,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/final_brier_reward_step": 0.6081289052963257,
|
|
"rewards/format_reward_step": 0.9375,
|
|
"rewards/step_l2_reward": 0.6600866317749023,
|
|
"step": 5
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7784135937690735,
|
|
"adv/mean_abs_reasoning": 0.4036681056022644,
|
|
"adv/mean_abs_step_conf": 0.7451872229576111,
|
|
"adv/ratio_final_to_reasoning": 1.9283505012309472,
|
|
"adv/ratio_step_to_reasoning": 1.8460393888335747,
|
|
"adv/std_final_conf": 0.93086838722229,
|
|
"adv/std_reasoning": 0.6816917061805725,
|
|
"adv/std_step_conf": 0.9351860880851746,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5571309156378601,
|
|
"calib/avg_num_step_conf": 5.0625,
|
|
"calib/ece": 0.3103571428571429,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.29365079365079366,
|
|
"calib/gap": 0.0076620370370369395,
|
|
"calib/mean_conf": 0.8817857142857142,
|
|
"calib/mu_c": 0.8850694444444444,
|
|
"calib/mu_w": 0.8774074074074074,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.3103571428571429,
|
|
"calib/std_conf": 0.03887793415515202,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.8020992907801419,
|
|
"calib/step_q_c_n": 705.0,
|
|
"calib/step_q_gap": 0.0034190877344566495,
|
|
"calib/step_q_w": 0.7986802030456852,
|
|
"calib/step_q_w_n": 591.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2291.0,
|
|
"completions/max_terminated_length": 2291.0,
|
|
"completions/mean_length": 455.55078125,
|
|
"completions/mean_terminated_length": 455.55078125,
|
|
"completions/min_length": 158.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.0064,
|
|
"grad_norm": 0.038733117282390594,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": -0.0042,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03740730136632919,
|
|
"mask/share_reasoning": 0.8363658785820007,
|
|
"mask/share_step_conf": 0.12622681260108948,
|
|
"num_tokens": 1394207.0,
|
|
"reward": 1.0009714365005493,
|
|
"reward_std": 0.2220253348350525,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/final_brier_reward_step": 0.6465281248092651,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.6984013915061951,
|
|
"step": 6
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7462236881256104,
|
|
"adv/mean_abs_reasoning": 0.5316657423973083,
|
|
"adv/mean_abs_step_conf": 0.7468922138214111,
|
|
"adv/ratio_final_to_reasoning": 1.4035579662530995,
|
|
"adv/ratio_step_to_reasoning": 1.4048153835408606,
|
|
"adv/std_final_conf": 0.9319174289703369,
|
|
"adv/std_reasoning": 0.7927316427230835,
|
|
"adv/std_step_conf": 0.9351266026496887,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.45222758990874934,
|
|
"calib/avg_num_step_conf": 4.90234375,
|
|
"calib/ece": 0.24657480314960623,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.29133858267716534,
|
|
"calib/gap": -0.008946591519055636,
|
|
"calib/mean_conf": 0.8811417322834646,
|
|
"calib/mu_c": 0.877901234567901,
|
|
"calib/mu_w": 0.8868478260869567,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.24496062992125978,
|
|
"calib/std_conf": 0.045009574556127536,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.7882035928143714,
|
|
"calib/step_q_c_n": 835.0,
|
|
"calib/step_q_gap": -0.01015355004277152,
|
|
"calib/step_q_w": 0.7983571428571429,
|
|
"calib/step_q_w_n": 420.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1375.0,
|
|
"completions/max_terminated_length": 1375.0,
|
|
"completions/mean_length": 519.80078125,
|
|
"completions/mean_terminated_length": 521.8392333984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 170.0,
|
|
"epoch": 0.007466666666666667,
|
|
"grad_norm": 0.04315668344497681,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": 0.0193,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.030725453048944473,
|
|
"mask/share_reasoning": 0.8632899522781372,
|
|
"mask/share_step_conf": 0.10207832604646683,
|
|
"num_tokens": 1634700.0,
|
|
"reward": 1.0672800540924072,
|
|
"reward_std": 0.2497740387916565,
|
|
"rewards/accuracy_reward_step": 0.6328125,
|
|
"rewards/final_brier_reward_step": 0.6942952871322632,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.7440304756164551,
|
|
"step": 7
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7801766395568848,
|
|
"adv/mean_abs_reasoning": 0.42296385765075684,
|
|
"adv/mean_abs_step_conf": 0.7665424346923828,
|
|
"adv/ratio_final_to_reasoning": 1.844546822251371,
|
|
"adv/ratio_step_to_reasoning": 1.8123119052061425,
|
|
"adv/std_final_conf": 0.9308924674987793,
|
|
"adv/std_reasoning": 0.681679368019104,
|
|
"adv/std_step_conf": 0.9358082413673401,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.469505104018607,
|
|
"calib/avg_num_step_conf": 4.7890625,
|
|
"calib/ece": 0.309402390438247,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.30278884462151395,
|
|
"calib/gap": 0.0029222121721154126,
|
|
"calib/mean_conf": 0.8751394422310756,
|
|
"calib/mu_c": 0.8764084507042255,
|
|
"calib/mu_w": 0.87348623853211,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.309402390438247,
|
|
"calib/std_conf": 0.056018996476398596,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.796939393939394,
|
|
"calib/step_q_c_n": 660.0,
|
|
"calib/step_q_gap": 0.031232680158475246,
|
|
"calib/step_q_w": 0.7657067137809187,
|
|
"calib/step_q_w_n": 566.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2637.0,
|
|
"completions/max_terminated_length": 2637.0,
|
|
"completions/mean_length": 528.79296875,
|
|
"completions/mean_terminated_length": 530.86669921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 161.0,
|
|
"epoch": 0.008533333333333334,
|
|
"grad_norm": 0.04369485378265381,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": -0.0333,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.033193521201610565,
|
|
"mask/share_reasoning": 0.8561908006668091,
|
|
"mask/share_step_conf": 0.10670942813158035,
|
|
"num_tokens": 1876583.0,
|
|
"reward": 1.025631308555603,
|
|
"reward_std": 0.22201970219612122,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/final_brier_reward_step": 0.6433879137039185,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.7344164848327637,
|
|
"step": 8
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7931197285652161,
|
|
"adv/mean_abs_reasoning": 0.478039026260376,
|
|
"adv/mean_abs_step_conf": 0.7831205129623413,
|
|
"adv/ratio_final_to_reasoning": 1.6591108361375153,
|
|
"adv/ratio_step_to_reasoning": 1.638193682822446,
|
|
"adv/std_final_conf": 0.9304501414299011,
|
|
"adv/std_reasoning": 0.7207533717155457,
|
|
"adv/std_step_conf": 0.9356123805046082,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.4702091767881242,
|
|
"calib/avg_num_step_conf": 4.8203125,
|
|
"calib/ece": 0.26191235059760953,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.30677290836653387,
|
|
"calib/gap": -0.005671390013495148,
|
|
"calib/mean_conf": 0.8801593625498008,
|
|
"calib/mu_c": 0.8780128205128207,
|
|
"calib/mu_w": 0.8836842105263158,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.26027888446215136,
|
|
"calib/std_conf": 0.04849091694212926,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.7883587786259543,
|
|
"calib/step_q_c_n": 786.0,
|
|
"calib/step_q_gap": 0.011104314340239951,
|
|
"calib/step_q_w": 0.7772544642857143,
|
|
"calib/step_q_w_n": 448.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2683.0,
|
|
"completions/max_terminated_length": 2683.0,
|
|
"completions/mean_length": 502.4453125,
|
|
"completions/mean_terminated_length": 506.4015808105469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 182.0,
|
|
"epoch": 0.0096,
|
|
"grad_norm": 0.04787834361195564,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": -0.0005,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.033189356327056885,
|
|
"mask/share_reasoning": 0.8479753732681274,
|
|
"mask/share_step_conf": 0.11102279275655746,
|
|
"num_tokens": 2112745.0,
|
|
"reward": 1.0018759965896606,
|
|
"reward_std": 0.24265122413635254,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/final_brier_reward_step": 0.6702331900596619,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/step_l2_reward": 0.6796374320983887,
|
|
"step": 9
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7296777963638306,
|
|
"adv/mean_abs_reasoning": 0.4447851777076721,
|
|
"adv/mean_abs_step_conf": 0.7466875910758972,
|
|
"adv/ratio_final_to_reasoning": 1.640517339459094,
|
|
"adv/ratio_step_to_reasoning": 1.678760058786504,
|
|
"adv/std_final_conf": 0.9287781715393066,
|
|
"adv/std_reasoning": 0.739201545715332,
|
|
"adv/std_step_conf": 0.934630274772644,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5115869358991734,
|
|
"calib/avg_num_step_conf": 5.203125,
|
|
"calib/ece": 0.2650996015936255,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.4063745019920319,
|
|
"calib/gap": 0.002316031982653066,
|
|
"calib/mean_conf": 0.890597609561753,
|
|
"calib/mu_c": 0.8914649681528661,
|
|
"calib/mu_w": 0.8891489361702131,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2650996015936255,
|
|
"calib/std_conf": 0.044979426840582565,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.7772214606741573,
|
|
"calib/step_q_c_n": 801.0,
|
|
"calib/step_q_gap": -0.009859518610211837,
|
|
"calib/step_q_w": 0.7870809792843692,
|
|
"calib/step_q_w_n": 531.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2480.0,
|
|
"completions/max_terminated_length": 2480.0,
|
|
"completions/mean_length": 511.8984375,
|
|
"completions/mean_terminated_length": 515.9291381835938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.010666666666666666,
|
|
"grad_norm": 0.056707367300987244,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.0109,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.032274842262268066,
|
|
"mask/share_reasoning": 0.8470996618270874,
|
|
"mask/share_step_conf": 0.11281301081180573,
|
|
"num_tokens": 2350591.0,
|
|
"reward": 1.042642593383789,
|
|
"reward_std": 0.20550422370433807,
|
|
"rewards/accuracy_reward_step": 0.61328125,
|
|
"rewards/final_brier_reward_step": 0.6809687614440918,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.7237110137939453,
|
|
"step": 10
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7741892337799072,
|
|
"adv/mean_abs_reasoning": 0.49845805764198303,
|
|
"adv/mean_abs_step_conf": 0.7594119310379028,
|
|
"adv/ratio_final_to_reasoning": 1.553168259416458,
|
|
"adv/ratio_step_to_reasoning": 1.5235222289923331,
|
|
"adv/std_final_conf": 0.9311660528182983,
|
|
"adv/std_reasoning": 0.7575029730796814,
|
|
"adv/std_step_conf": 0.9356948733329773,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.40005658953722334,
|
|
"calib/avg_num_step_conf": 5.23828125,
|
|
"calib/ece": 0.3330314960629921,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.37401574803149606,
|
|
"calib/gap": -0.020489185110664,
|
|
"calib/mean_conf": 0.8845275590551179,
|
|
"calib/mu_c": 0.8754929577464788,
|
|
"calib/mu_w": 0.8959821428571428,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.32925196850393695,
|
|
"calib/std_conf": 0.054472896789072724,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.7695857142857143,
|
|
"calib/step_q_c_n": 700.0,
|
|
"calib/step_q_gap": -0.020308201470915943,
|
|
"calib/step_q_w": 0.7898939157566303,
|
|
"calib/step_q_w_n": 641.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1888.0,
|
|
"completions/max_terminated_length": 1888.0,
|
|
"completions/mean_length": 531.6328125,
|
|
"completions/mean_terminated_length": 531.6328125,
|
|
"completions/min_length": 159.0,
|
|
"completions/min_terminated_length": 159.0,
|
|
"epoch": 0.011733333333333333,
|
|
"grad_norm": 0.03830113634467125,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": 0.0432,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.031960394233465195,
|
|
"mask/share_reasoning": 0.8549282550811768,
|
|
"mask/share_step_conf": 0.11311139166355133,
|
|
"num_tokens": 2591169.0,
|
|
"reward": 0.9934152364730835,
|
|
"reward_std": 0.22627121210098267,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/final_brier_reward_step": 0.6214656233787537,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.7055557370185852,
|
|
"step": 11
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7641464471817017,
|
|
"adv/mean_abs_reasoning": 0.4781668484210968,
|
|
"adv/mean_abs_step_conf": 0.7605932950973511,
|
|
"adv/ratio_final_to_reasoning": 1.5980749182108867,
|
|
"adv/ratio_step_to_reasoning": 1.5906441394020188,
|
|
"adv/std_final_conf": 0.9308655858039856,
|
|
"adv/std_reasoning": 0.7394251823425293,
|
|
"adv/std_step_conf": 0.9351321458816528,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.47628443782576324,
|
|
"calib/avg_num_step_conf": 5.640625,
|
|
"calib/ece": 0.20963855421686747,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.3534136546184739,
|
|
"calib/gap": -0.004539836187639645,
|
|
"calib/mean_conf": 0.8853815261044177,
|
|
"calib/mu_c": 0.8839411764705881,
|
|
"calib/mu_w": 0.8884810126582278,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.206144578313253,
|
|
"calib/std_conf": 0.05083198385278664,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.7810807860262009,
|
|
"calib/step_q_c_n": 916.0,
|
|
"calib/step_q_gap": 0.029660331480746316,
|
|
"calib/step_q_w": 0.7514204545454546,
|
|
"calib/step_q_w_n": 528.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2679.0,
|
|
"completions/max_terminated_length": 2679.0,
|
|
"completions/mean_length": 486.28515625,
|
|
"completions/mean_terminated_length": 490.1141662597656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.0128,
|
|
"grad_norm": 0.057817842811346054,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.0705,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.035377874970436096,
|
|
"mask/share_reasoning": 0.8293168544769287,
|
|
"mask/share_step_conf": 0.127492755651474,
|
|
"num_tokens": 2819834.0,
|
|
"reward": 1.0887587070465088,
|
|
"reward_std": 0.23510727286338806,
|
|
"rewards/accuracy_reward_step": 0.6640625,
|
|
"rewards/final_brier_reward_step": 0.709521472454071,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/step_l2_reward": 0.7619972229003906,
|
|
"step": 12
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7575913667678833,
|
|
"adv/mean_abs_reasoning": 0.5171928405761719,
|
|
"adv/mean_abs_step_conf": 0.7762430906295776,
|
|
"adv/ratio_final_to_reasoning": 1.4648141028477861,
|
|
"adv/ratio_step_to_reasoning": 1.5008774865576526,
|
|
"adv/std_final_conf": 0.9276416897773743,
|
|
"adv/std_reasoning": 0.7753491401672363,
|
|
"adv/std_step_conf": 0.9352768659591675,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5217447916666667,
|
|
"calib/avg_num_step_conf": 5.04296875,
|
|
"calib/ece": 0.2686328125000001,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.453125,
|
|
"calib/gap": 0.009312500000000057,
|
|
"calib/mean_conf": 0.8936328124999999,
|
|
"calib/mu_c": 0.897125,
|
|
"calib/mu_w": 0.8878124999999999,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.2686328125000001,
|
|
"calib/std_conf": 0.05293427101925409,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.7646658259773014,
|
|
"calib/step_q_c_n": 793.0,
|
|
"calib/step_q_gap": 0.006272251680112695,
|
|
"calib/step_q_w": 0.7583935742971887,
|
|
"calib/step_q_w_n": 498.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1421.0,
|
|
"completions/max_terminated_length": 1421.0,
|
|
"completions/mean_length": 484.59765625,
|
|
"completions/mean_terminated_length": 486.4980773925781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 133.0,
|
|
"epoch": 0.013866666666666666,
|
|
"grad_norm": 0.03953487053513527,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": 0.0582,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03431730717420578,
|
|
"mask/share_reasoning": 0.8443996906280518,
|
|
"mask/share_step_conf": 0.11737672984600067,
|
|
"num_tokens": 3048483.0,
|
|
"reward": 1.0782309770584106,
|
|
"reward_std": 0.23127850890159607,
|
|
"rewards/accuracy_reward_step": 0.625,
|
|
"rewards/final_brier_reward_step": 0.6862156391143799,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.7655808329582214,
|
|
"step": 13
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7818053960800171,
|
|
"adv/mean_abs_reasoning": 0.595024585723877,
|
|
"adv/mean_abs_step_conf": 0.7662662267684937,
|
|
"adv/ratio_final_to_reasoning": 1.313904357630722,
|
|
"adv/ratio_step_to_reasoning": 1.2877891857801014,
|
|
"adv/std_final_conf": 0.9330616593360901,
|
|
"adv/std_reasoning": 0.826580286026001,
|
|
"adv/std_step_conf": 0.9357913732528687,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.435423880979864,
|
|
"calib/avg_num_step_conf": 5.234375,
|
|
"calib/ece": 0.3532669322709163,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.5139442231075697,
|
|
"calib/gap": -0.0076189560087213115,
|
|
"calib/mean_conf": 0.9030677290836654,
|
|
"calib/mu_c": 0.8996376811594203,
|
|
"calib/mu_w": 0.9072566371681416,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.3532669322709163,
|
|
"calib/std_conf": 0.043963850081247945,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.7427310924369749,
|
|
"calib/step_q_c_n": 714.0,
|
|
"calib/step_q_gap": 0.004056971031224155,
|
|
"calib/step_q_w": 0.7386741214057507,
|
|
"calib/step_q_w_n": 626.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2627.0,
|
|
"completions/max_terminated_length": 2627.0,
|
|
"completions/mean_length": 542.8515625,
|
|
"completions/mean_terminated_length": 547.1259765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 135.0,
|
|
"epoch": 0.014933333333333333,
|
|
"grad_norm": 0.046196747571229935,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": -0.0362,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03230297192931175,
|
|
"mask/share_reasoning": 0.8441320657730103,
|
|
"mask/share_step_conf": 0.11575242131948471,
|
|
"num_tokens": 3292853.0,
|
|
"reward": 1.0177867412567139,
|
|
"reward_std": 0.2599422335624695,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.6077121496200562,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.75034499168396,
|
|
"step": 14
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7688440084457397,
|
|
"adv/mean_abs_reasoning": 0.39704400300979614,
|
|
"adv/mean_abs_step_conf": 0.7553344964981079,
|
|
"adv/ratio_final_to_reasoning": 1.9364201514630868,
|
|
"adv/ratio_step_to_reasoning": 1.9023949254296426,
|
|
"adv/std_final_conf": 0.9207010269165039,
|
|
"adv/std_reasoning": 0.6612381339073181,
|
|
"adv/std_step_conf": 0.9349432587623596,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4999685059208869,
|
|
"calib/avg_num_step_conf": 4.98046875,
|
|
"calib/ece": 0.3353725490196079,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.6352941176470588,
|
|
"calib/gap": 0.0038397581254724367,
|
|
"calib/mean_conf": 0.911843137254902,
|
|
"calib/mu_c": 0.9134693877551019,
|
|
"calib/mu_w": 0.9096296296296295,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.3353725490196079,
|
|
"calib/std_conf": 0.04342375447727579,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.711628895184136,
|
|
"calib/step_q_c_n": 706.0,
|
|
"calib/step_q_gap": 8.232224916226993e-05,
|
|
"calib/step_q_w": 0.7115465729349737,
|
|
"calib/step_q_w_n": 569.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2893.0,
|
|
"completions/max_terminated_length": 2893.0,
|
|
"completions/mean_length": 472.23828125,
|
|
"completions/mean_terminated_length": 472.23828125,
|
|
"completions/min_length": 137.0,
|
|
"completions/min_terminated_length": 137.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.035142287611961365,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.0654,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.034696608781814575,
|
|
"mask/share_reasoning": 0.8488560914993286,
|
|
"mask/share_step_conf": 0.11644729971885681,
|
|
"num_tokens": 3521626.0,
|
|
"reward": 1.0546379089355469,
|
|
"reward_std": 0.1892717480659485,
|
|
"rewards/accuracy_reward_step": 0.57421875,
|
|
"rewards/final_brier_reward_step": 0.640849232673645,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.7695759534835815,
|
|
"step": 15
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7650634050369263,
|
|
"adv/mean_abs_reasoning": 0.4421984553337097,
|
|
"adv/mean_abs_step_conf": 0.764449954032898,
|
|
"adv/ratio_final_to_reasoning": 1.7301358605144,
|
|
"adv/ratio_step_to_reasoning": 1.7287485851934914,
|
|
"adv/std_final_conf": 0.925650417804718,
|
|
"adv/std_reasoning": 0.7014507055282593,
|
|
"adv/std_step_conf": 0.9351567625999451,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.5157855088344062,
|
|
"calib/avg_num_step_conf": 6.7734375,
|
|
"calib/ece": 0.32368421052631585,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.7935222672064778,
|
|
"calib/gap": -2.6023832351618204e-05,
|
|
"calib/mean_conf": 0.9269230769230768,
|
|
"calib/mu_c": 0.9269127516778525,
|
|
"calib/mu_w": 0.9269387755102041,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.32368421052631585,
|
|
"calib/std_conf": 0.03425680218286145,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.669151219512195,
|
|
"calib/step_q_c_n": 1025.0,
|
|
"calib/step_q_gap": 0.024694237847879053,
|
|
"calib/step_q_w": 0.644456981664316,
|
|
"calib/step_q_w_n": 709.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2966.0,
|
|
"completions/max_terminated_length": 2966.0,
|
|
"completions/mean_length": 662.47265625,
|
|
"completions/mean_terminated_length": 667.68896484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 207.0,
|
|
"epoch": 0.017066666666666667,
|
|
"grad_norm": 0.050142817199230194,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.0649,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.02496221847832203,
|
|
"mask/share_reasoning": 0.8537300825119019,
|
|
"mask/share_step_conf": 0.11349518597126007,
|
|
"num_tokens": 3800067.0,
|
|
"reward": 1.0430816411972046,
|
|
"reward_std": 0.21906863152980804,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/final_brier_reward_step": 0.6314531564712524,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/step_l2_reward": 0.763556718826294,
|
|
"step": 16
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7514055967330933,
|
|
"adv/mean_abs_reasoning": 0.4066672623157501,
|
|
"adv/mean_abs_step_conf": 0.7520856857299805,
|
|
"adv/ratio_final_to_reasoning": 1.8477159741215576,
|
|
"adv/ratio_step_to_reasoning": 1.8493883216643976,
|
|
"adv/std_final_conf": 0.9193625450134277,
|
|
"adv/std_reasoning": 0.6816253662109375,
|
|
"adv/std_step_conf": 0.9349757432937622,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5193163685227177,
|
|
"calib/avg_num_step_conf": 5.671875,
|
|
"calib/ece": 0.18273809523809512,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.8134920634920635,
|
|
"calib/gap": 0.0010052910052912312,
|
|
"calib/mean_conf": 0.9267857142857142,
|
|
"calib/mu_c": 0.9270370370370371,
|
|
"calib/mu_w": 0.9260317460317459,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.17976190476190462,
|
|
"calib/std_conf": 0.038652728839139124,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.6473840445269017,
|
|
"calib/step_q_c_n": 1078.0,
|
|
"calib/step_q_gap": 0.01521826912583224,
|
|
"calib/step_q_w": 0.6321657754010694,
|
|
"calib/step_q_w_n": 374.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2822.0,
|
|
"completions/max_terminated_length": 2822.0,
|
|
"completions/mean_length": 535.9609375,
|
|
"completions/mean_terminated_length": 538.0628051757812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 139.0,
|
|
"epoch": 0.018133333333333335,
|
|
"grad_norm": 0.074327252805233,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": 0.03,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.031611405313014984,
|
|
"mask/share_reasoning": 0.839647650718689,
|
|
"mask/share_step_conf": 0.12483467906713486,
|
|
"num_tokens": 4040801.0,
|
|
"reward": 1.1753484010696411,
|
|
"reward_std": 0.1905728280544281,
|
|
"rewards/accuracy_reward_step": 0.73828125,
|
|
"rewards/final_brier_reward_step": 0.7602598071098328,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8316453695297241,
|
|
"step": 17
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7634913921356201,
|
|
"adv/mean_abs_reasoning": 0.39834457635879517,
|
|
"adv/mean_abs_step_conf": 0.7617502212524414,
|
|
"adv/ratio_final_to_reasoning": 1.9166606939011805,
|
|
"adv/ratio_step_to_reasoning": 1.9122896769813709,
|
|
"adv/std_final_conf": 0.9149329662322998,
|
|
"adv/std_reasoning": 0.6816260814666748,
|
|
"adv/std_step_conf": 0.9351475834846497,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.46570048309178746,
|
|
"calib/avg_num_step_conf": 6.01171875,
|
|
"calib/ece": 0.4038399999999998,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.96,
|
|
"calib/gap": -0.0011014492753622651,
|
|
"calib/mean_conf": 0.94384,
|
|
"calib/mu_c": 0.9433333333333332,
|
|
"calib/mu_w": 0.9444347826086955,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.4038399999999998,
|
|
"calib/std_conf": 0.02372455268282205,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.6368571428571428,
|
|
"calib/step_q_c_n": 700.0,
|
|
"calib/step_q_gap": 0.05236369828026555,
|
|
"calib/step_q_w": 0.5844934445768772,
|
|
"calib/step_q_w_n": 839.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2936.0,
|
|
"completions/max_terminated_length": 2936.0,
|
|
"completions/mean_length": 552.50390625,
|
|
"completions/mean_terminated_length": 554.6705932617188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 199.0,
|
|
"epoch": 0.0192,
|
|
"grad_norm": 0.037905480712652206,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 0.0232,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.030872829258441925,
|
|
"mask/share_reasoning": 0.8516048192977905,
|
|
"mask/share_step_conf": 0.11361609399318695,
|
|
"num_tokens": 4292962.0,
|
|
"reward": 1.0092458724975586,
|
|
"reward_std": 0.20301344990730286,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/final_brier_reward_step": 0.5696554780006409,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.7664117813110352,
|
|
"step": 18
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7377002239227295,
|
|
"adv/mean_abs_reasoning": 0.39063119888305664,
|
|
"adv/mean_abs_step_conf": 0.7582512497901917,
|
|
"adv/ratio_final_to_reasoning": 1.8884826046461665,
|
|
"adv/ratio_step_to_reasoning": 1.9410923959946924,
|
|
"adv/std_final_conf": 0.9077786803245544,
|
|
"adv/std_reasoning": 0.661250114440918,
|
|
"adv/std_step_conf": 0.9351308941841125,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.45849313373747375,
|
|
"calib/avg_num_step_conf": 5.27734375,
|
|
"calib/ece": 0.4124313725490197,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.9607843137254902,
|
|
"calib/gap": 0.005094643078065397,
|
|
"calib/mean_conf": 0.949686274509804,
|
|
"calib/mu_c": 0.9520437956204381,
|
|
"calib/mu_w": 0.9469491525423727,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4124313725490197,
|
|
"calib/std_conf": 0.06354170783934227,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.6217280453257791,
|
|
"calib/step_q_c_n": 706.0,
|
|
"calib/step_q_gap": 0.022828820519577464,
|
|
"calib/step_q_w": 0.5988992248062016,
|
|
"calib/step_q_w_n": 645.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2868.0,
|
|
"completions/max_terminated_length": 2868.0,
|
|
"completions/mean_length": 511.37890625,
|
|
"completions/mean_terminated_length": 511.37890625,
|
|
"completions/min_length": 223.0,
|
|
"completions/min_terminated_length": 223.0,
|
|
"epoch": 0.020266666666666665,
|
|
"grad_norm": 0.03694775328040123,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": 0.0088,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.031130220741033554,
|
|
"mask/share_reasoning": 0.8523164987564087,
|
|
"mask/share_step_conf": 0.11655329167842865,
|
|
"num_tokens": 4528635.0,
|
|
"reward": 1.0464892387390137,
|
|
"reward_std": 0.18434491753578186,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/final_brier_reward_step": 0.5775191783905029,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8061395287513733,
|
|
"step": 19
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7078642249107361,
|
|
"adv/mean_abs_reasoning": 0.46765512228012085,
|
|
"adv/mean_abs_step_conf": 0.7625718116760254,
|
|
"adv/ratio_final_to_reasoning": 1.5136458282749918,
|
|
"adv/ratio_step_to_reasoning": 1.630628587917513,
|
|
"adv/std_final_conf": 0.8795217871665955,
|
|
"adv/std_reasoning": 0.7392780184745789,
|
|
"adv/std_step_conf": 0.9349386692047119,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.406062424969988,
|
|
"calib/avg_num_step_conf": 6.26171875,
|
|
"calib/ece": 0.35462151394422314,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.9960159362549801,
|
|
"calib/gap": -0.003850873682806344,
|
|
"calib/mean_conf": 0.964183266932271,
|
|
"calib/mu_c": 0.9626797385620917,
|
|
"calib/mu_w": 0.966530612244898,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.35462151394422314,
|
|
"calib/std_conf": 0.014100884338506946,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5732155074116305,
|
|
"calib/step_q_c_n": 877.0,
|
|
"calib/step_q_gap": -0.00704620057735017,
|
|
"calib/step_q_w": 0.5802617079889807,
|
|
"calib/step_q_w_n": 726.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2558.0,
|
|
"completions/max_terminated_length": 2558.0,
|
|
"completions/mean_length": 539.5625,
|
|
"completions/mean_terminated_length": 539.5625,
|
|
"completions/min_length": 187.0,
|
|
"completions/min_terminated_length": 187.0,
|
|
"epoch": 0.021333333333333333,
|
|
"grad_norm": 0.029928909614682198,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.0835,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.032758794724941254,
|
|
"mask/share_reasoning": 0.8348656892776489,
|
|
"mask/share_step_conf": 0.13237547874450684,
|
|
"num_tokens": 4771635.0,
|
|
"reward": 1.0676593780517578,
|
|
"reward_std": 0.21638810634613037,
|
|
"rewards/accuracy_reward_step": 0.59765625,
|
|
"rewards/final_brier_reward_step": 0.617925763130188,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8016993999481201,
|
|
"step": 20
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7431222200393677,
|
|
"adv/mean_abs_reasoning": 0.4694896936416626,
|
|
"adv/mean_abs_step_conf": 0.754639208316803,
|
|
"adv/ratio_final_to_reasoning": 1.5828296767821164,
|
|
"adv/ratio_step_to_reasoning": 1.6073605417476542,
|
|
"adv/std_final_conf": 0.8799741268157959,
|
|
"adv/std_reasoning": 0.7206478118896484,
|
|
"adv/std_step_conf": 0.9352614283561707,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5926800472255017,
|
|
"calib/avg_num_step_conf": 6.60546875,
|
|
"calib/ece": 0.3619762845849802,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.004256854256854292,
|
|
"calib/mean_conf": 0.9706719367588933,
|
|
"calib/mu_c": 0.9723376623376624,
|
|
"calib/mu_w": 0.9680808080808081,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3619762845849802,
|
|
"calib/std_conf": 0.012188533902793975,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.550582627118644,
|
|
"calib/step_q_c_n": 944.0,
|
|
"calib/step_q_gap": -0.03467841705806285,
|
|
"calib/step_q_w": 0.5852610441767069,
|
|
"calib/step_q_w_n": 747.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2314.0,
|
|
"completions/max_terminated_length": 2314.0,
|
|
"completions/mean_length": 516.67578125,
|
|
"completions/mean_terminated_length": 522.8023681640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.0,
|
|
"epoch": 0.0224,
|
|
"grad_norm": 0.03543133661150932,
|
|
"learning_rate": 4.9722222222222224e-06,
|
|
"loss": -0.1217,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.031131424009799957,
|
|
"mask/share_reasoning": 0.8254822492599487,
|
|
"mask/share_step_conf": 0.1316676139831543,
|
|
"num_tokens": 5006864.0,
|
|
"reward": 1.0767557621002197,
|
|
"reward_std": 0.21480554342269897,
|
|
"rewards/accuracy_reward_step": 0.6015625,
|
|
"rewards/final_brier_reward_step": 0.6252531409263611,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8068596124649048,
|
|
"step": 21
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6841797828674316,
|
|
"adv/mean_abs_reasoning": 0.41725265979766846,
|
|
"adv/mean_abs_step_conf": 0.7678303122520447,
|
|
"adv/ratio_final_to_reasoning": 1.6397253961165874,
|
|
"adv/ratio_step_to_reasoning": 1.8402047158294357,
|
|
"adv/std_final_conf": 0.8309208154678345,
|
|
"adv/std_reasoning": 0.6815685033798218,
|
|
"adv/std_step_conf": 0.9349187016487122,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.44874088344571345,
|
|
"calib/avg_num_step_conf": 6.37109375,
|
|
"calib/ece": 0.3083921568627451,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.002670290353653182,
|
|
"calib/mean_conf": 0.9711372549019608,
|
|
"calib/mu_c": 0.9702366863905327,
|
|
"calib/mu_w": 0.9729069767441859,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3083921568627451,
|
|
"calib/std_conf": 0.011231991438748663,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5689056603773586,
|
|
"calib/step_q_c_n": 1060.0,
|
|
"calib/step_q_gap": 0.04160268314443394,
|
|
"calib/step_q_w": 0.5273029772329246,
|
|
"calib/step_q_w_n": 571.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1293.0,
|
|
"completions/max_terminated_length": 1293.0,
|
|
"completions/mean_length": 513.28125,
|
|
"completions/mean_terminated_length": 515.2941284179688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 207.0,
|
|
"epoch": 0.023466666666666667,
|
|
"grad_norm": 0.045953840017318726,
|
|
"learning_rate": 4.944444444444445e-06,
|
|
"loss": 0.0053,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.030370274558663368,
|
|
"mask/share_reasoning": 0.8338354229927063,
|
|
"mask/share_step_conf": 0.13188806176185608,
|
|
"num_tokens": 5240080.0,
|
|
"reward": 1.1322379112243652,
|
|
"reward_std": 0.18259194493293762,
|
|
"rewards/accuracy_reward_step": 0.66015625,
|
|
"rewards/final_brier_reward_step": 0.6774039268493652,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8372147083282471,
|
|
"step": 22
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7485775351524353,
|
|
"adv/mean_abs_reasoning": 0.4457091689109802,
|
|
"adv/mean_abs_step_conf": 0.7674142718315125,
|
|
"adv/ratio_final_to_reasoning": 1.679520161053599,
|
|
"adv/ratio_step_to_reasoning": 1.7217825554420783,
|
|
"adv/std_final_conf": 0.8745721578598022,
|
|
"adv/std_reasoning": 0.701352059841156,
|
|
"adv/std_step_conf": 0.9352415800094604,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5172980345960692,
|
|
"calib/avg_num_step_conf": 6.4609375,
|
|
"calib/ece": 0.4768897637795275,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0007086614173227312,
|
|
"calib/mean_conf": 0.9768897637795275,
|
|
"calib/mu_c": 0.9772440944881889,
|
|
"calib/mu_w": 0.9765354330708662,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4768897637795275,
|
|
"calib/std_conf": 0.011301681532232766,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5868795180722892,
|
|
"calib/step_q_c_n": 830.0,
|
|
"calib/step_q_gap": 0.007656217101415508,
|
|
"calib/step_q_w": 0.5792233009708737,
|
|
"calib/step_q_w_n": 824.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2576.0,
|
|
"completions/max_terminated_length": 2576.0,
|
|
"completions/mean_length": 563.00390625,
|
|
"completions/mean_terminated_length": 563.00390625,
|
|
"completions/min_length": 166.0,
|
|
"completions/min_terminated_length": 166.0,
|
|
"epoch": 0.024533333333333334,
|
|
"grad_norm": 0.04560421034693718,
|
|
"learning_rate": 4.9166666666666665e-06,
|
|
"loss": 0.0387,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.030303383246064186,
|
|
"mask/share_reasoning": 0.8368549942970276,
|
|
"mask/share_step_conf": 0.13284161686897278,
|
|
"num_tokens": 5488145.0,
|
|
"reward": 1.0002158880233765,
|
|
"reward_std": 0.21627703309059143,
|
|
"rewards/accuracy_reward_step": 0.5,
|
|
"rewards/final_brier_reward_step": 0.5187183618545532,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.7888506054878235,
|
|
"step": 23
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7822250127792358,
|
|
"adv/mean_abs_reasoning": 0.5776165127754211,
|
|
"adv/mean_abs_step_conf": 0.7525804042816162,
|
|
"adv/ratio_final_to_reasoning": 1.3542289658941364,
|
|
"adv/ratio_step_to_reasoning": 1.3029066649523253,
|
|
"adv/std_final_conf": 0.8965274691581726,
|
|
"adv/std_reasoning": 0.7929160594940186,
|
|
"adv/std_step_conf": 0.935590386390686,
|
|
"calib/answer_extract_rate": 0.95703125,
|
|
"calib/auroc": 0.545442395081529,
|
|
"calib/avg_num_step_conf": 7.59765625,
|
|
"calib/ece": 0.4527755102040817,
|
|
"calib/final_conf_rate": 0.95703125,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0017929697941725387,
|
|
"calib/mean_conf": 0.9793061224489796,
|
|
"calib/mu_c": 0.9801550387596899,
|
|
"calib/mu_w": 0.9783620689655174,
|
|
"calib/nonempty_final_conf_rate": 0.95703125,
|
|
"calib/nonempty_reasoning_rate": 0.9765625,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.4527755102040817,
|
|
"calib/std_conf": 0.010687020702478803,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.5750436681222707,
|
|
"calib/step_q_c_n": 916.0,
|
|
"calib/step_q_gap": 0.00674434839437954,
|
|
"calib/step_q_w": 0.5682993197278912,
|
|
"calib/step_q_w_n": 1029.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2585.0,
|
|
"completions/max_terminated_length": 2585.0,
|
|
"completions/mean_length": 648.9921875,
|
|
"completions/mean_terminated_length": 659.293701171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 169.0,
|
|
"epoch": 0.0256,
|
|
"grad_norm": 0.05360132455825806,
|
|
"learning_rate": 4.888888888888889e-06,
|
|
"loss": -0.0151,
|
|
"mask/has_final_conf_rate": 0.95703125,
|
|
"mask/share_final_conf": 0.026258273050189018,
|
|
"mask/share_reasoning": 0.8305296897888184,
|
|
"mask/share_step_conf": 0.12758705019950867,
|
|
"num_tokens": 5758799.0,
|
|
"reward": 0.9910411238670349,
|
|
"reward_std": 0.25226694345474243,
|
|
"rewards/accuracy_reward_step": 0.50390625,
|
|
"rewards/final_brier_reward_step": 0.5229964852333069,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"rewards/step_l2_reward": 0.7779321074485779,
|
|
"step": 24
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7140051126480103,
|
|
"adv/mean_abs_reasoning": 0.43053969740867615,
|
|
"adv/mean_abs_step_conf": 0.7535181641578674,
|
|
"adv/ratio_final_to_reasoning": 1.658395536916689,
|
|
"adv/ratio_step_to_reasoning": 1.7501711658486494,
|
|
"adv/std_final_conf": 0.8654311895370483,
|
|
"adv/std_reasoning": 0.7013791799545288,
|
|
"adv/std_step_conf": 0.935165286064148,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.4696669310071372,
|
|
"calib/avg_num_step_conf": 6.87890625,
|
|
"calib/ece": 0.36600790513834003,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.0011194818926778538,
|
|
"calib/mean_conf": 0.982608695652174,
|
|
"calib/mu_c": 0.982179487179487,
|
|
"calib/mu_w": 0.9832989690721649,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.36600790513834003,
|
|
"calib/std_conf": 0.00934161941203924,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5901769911504425,
|
|
"calib/step_q_c_n": 1017.0,
|
|
"calib/step_q_gap": 0.04207215244076512,
|
|
"calib/step_q_w": 0.5481048387096774,
|
|
"calib/step_q_w_n": 744.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2593.0,
|
|
"completions/max_terminated_length": 2593.0,
|
|
"completions/mean_length": 569.87890625,
|
|
"completions/mean_terminated_length": 572.11376953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 188.0,
|
|
"epoch": 0.02666666666666667,
|
|
"grad_norm": 0.05113760754466057,
|
|
"learning_rate": 4.861111111111111e-06,
|
|
"loss": 0.0074,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.02784719131886959,
|
|
"mask/share_reasoning": 0.837721586227417,
|
|
"mask/share_step_conf": 0.13052499294281006,
|
|
"num_tokens": 6007912.0,
|
|
"reward": 1.0738012790679932,
|
|
"reward_std": 0.20915639400482178,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/final_brier_reward_step": 0.6216461062431335,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.804283618927002,
|
|
"step": 25
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6176284551620483,
|
|
"adv/mean_abs_reasoning": 0.31737247109413147,
|
|
"adv/mean_abs_step_conf": 0.7418646812438965,
|
|
"adv/ratio_final_to_reasoning": 1.9460681420565367,
|
|
"adv/ratio_step_to_reasoning": 2.337520575386837,
|
|
"adv/std_final_conf": 0.7885181307792664,
|
|
"adv/std_reasoning": 0.5960696339607239,
|
|
"adv/std_step_conf": 0.9348067045211792,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4531923101652414,
|
|
"calib/avg_num_step_conf": 6.32421875,
|
|
"calib/ece": 0.39157480314960635,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.0018864527743842618,
|
|
"calib/mean_conf": 0.9860629921259844,
|
|
"calib/mu_c": 0.985298013245033,
|
|
"calib/mu_w": 0.9871844660194172,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.39157480314960635,
|
|
"calib/std_conf": 0.007116298285162213,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5744040862656073,
|
|
"calib/step_q_c_n": 881.0,
|
|
"calib/step_q_gap": 0.03607075293227402,
|
|
"calib/step_q_w": 0.5383333333333333,
|
|
"calib/step_q_w_n": 738.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2289.0,
|
|
"completions/max_terminated_length": 2289.0,
|
|
"completions/mean_length": 579.5234375,
|
|
"completions/mean_terminated_length": 579.5234375,
|
|
"completions/min_length": 292.0,
|
|
"completions/min_terminated_length": 292.0,
|
|
"epoch": 0.027733333333333332,
|
|
"grad_norm": 0.0754050686955452,
|
|
"learning_rate": 4.833333333333333e-06,
|
|
"loss": -0.0124,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.02674679085612297,
|
|
"mask/share_reasoning": 0.8562403917312622,
|
|
"mask/share_step_conf": 0.1170128583908081,
|
|
"num_tokens": 6261510.0,
|
|
"reward": 1.070356845855713,
|
|
"reward_std": 0.15499469637870789,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.5999132394790649,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8162626028060913,
|
|
"step": 26
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5428198575973511,
|
|
"adv/mean_abs_reasoning": 0.4752519428730011,
|
|
"adv/mean_abs_step_conf": 0.7459293603897095,
|
|
"adv/ratio_final_to_reasoning": 1.1421728321948297,
|
|
"adv/ratio_step_to_reasoning": 1.569545104603686,
|
|
"adv/std_final_conf": 0.764316976070404,
|
|
"adv/std_reasoning": 0.7206440567970276,
|
|
"adv/std_step_conf": 0.935205340385437,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4837928464977645,
|
|
"calib/avg_num_step_conf": 7.6328125,
|
|
"calib/ece": 0.46937007874015746,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.000398658718331113,
|
|
"calib/mean_conf": 0.9890551181102362,
|
|
"calib/mu_c": 0.9888636363636362,
|
|
"calib/mu_w": 0.9892622950819673,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.46937007874015746,
|
|
"calib/std_conf": 0.0034213481862047958,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5747283702213279,
|
|
"calib/step_q_c_n": 994.0,
|
|
"calib/step_q_gap": 0.017290870221327803,
|
|
"calib/step_q_w": 0.5574375000000001,
|
|
"calib/step_q_w_n": 960.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2426.0,
|
|
"completions/max_terminated_length": 2426.0,
|
|
"completions/mean_length": 578.6015625,
|
|
"completions/mean_terminated_length": 578.6015625,
|
|
"completions/min_length": 258.0,
|
|
"completions/min_terminated_length": 258.0,
|
|
"epoch": 0.0288,
|
|
"grad_norm": 0.040866825729608536,
|
|
"learning_rate": 4.805555555555556e-06,
|
|
"loss": 0.0375,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.02749168500304222,
|
|
"mask/share_reasoning": 0.8314797878265381,
|
|
"mask/share_step_conf": 0.14102855324745178,
|
|
"num_tokens": 6514848.0,
|
|
"reward": 1.0136632919311523,
|
|
"reward_std": 0.22315937280654907,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/final_brier_reward_step": 0.5257288813591003,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8000233173370361,
|
|
"step": 27
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.45969158411026,
|
|
"adv/mean_abs_reasoning": 0.4384334683418274,
|
|
"adv/mean_abs_step_conf": 0.7739053964614868,
|
|
"adv/ratio_final_to_reasoning": 1.0484865260146121,
|
|
"adv/ratio_step_to_reasoning": 1.765160400250527,
|
|
"adv/std_final_conf": 0.7234077453613281,
|
|
"adv/std_reasoning": 0.7206025719642639,
|
|
"adv/std_step_conf": 0.9350380897521973,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.4940119760479042,
|
|
"calib/avg_num_step_conf": 7.3671875,
|
|
"calib/ece": 0.31653225806451624,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.0001197604790422746,
|
|
"calib/mean_conf": 0.9899193548387097,
|
|
"calib/mu_c": 0.9898802395209577,
|
|
"calib/mu_w": 0.99,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.31653225806451624,
|
|
"calib/std_conf": 0.001267438197217881,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5770805921052632,
|
|
"calib/step_q_c_n": 1216.0,
|
|
"calib/step_q_gap": 0.007408950314218399,
|
|
"calib/step_q_w": 0.5696716417910448,
|
|
"calib/step_q_w_n": 670.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2723.0,
|
|
"completions/max_terminated_length": 2723.0,
|
|
"completions/mean_length": 655.50390625,
|
|
"completions/mean_terminated_length": 655.50390625,
|
|
"completions/min_length": 240.0,
|
|
"completions/min_terminated_length": 240.0,
|
|
"epoch": 0.029866666666666666,
|
|
"grad_norm": 0.03843703120946884,
|
|
"learning_rate": 4.777777777777778e-06,
|
|
"loss": 0.0027,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.02509191818535328,
|
|
"mask/share_reasoning": 0.8488214612007141,
|
|
"mask/share_step_conf": 0.12608662247657776,
|
|
"num_tokens": 6789601.0,
|
|
"reward": 1.0947589874267578,
|
|
"reward_std": 0.22782441973686218,
|
|
"rewards/accuracy_reward_step": 0.65625,
|
|
"rewards/final_brier_reward_step": 0.6585718393325806,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.8039641976356506,
|
|
"step": 28
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4598812460899353,
|
|
"adv/mean_abs_reasoning": 0.4270208179950714,
|
|
"adv/mean_abs_step_conf": 0.737694501876831,
|
|
"adv/ratio_final_to_reasoning": 1.0769527543156998,
|
|
"adv/ratio_step_to_reasoning": 1.7275375597387044,
|
|
"adv/std_final_conf": 0.7559602856636047,
|
|
"adv/std_reasoning": 0.7392783761024475,
|
|
"adv/std_step_conf": 0.9354400038719177,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.4927007299270073,
|
|
"calib/avg_num_step_conf": 8.2421875,
|
|
"calib/ece": 0.4284426229508197,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.00014598540145960293,
|
|
"calib/mean_conf": 0.9899180327868853,
|
|
"calib/mu_c": 0.9898540145985403,
|
|
"calib/mu_w": 0.9899999999999999,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4284426229508197,
|
|
"calib/std_conf": 0.0009016393442622958,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5657422222222223,
|
|
"calib/step_q_c_n": 1125.0,
|
|
"calib/step_q_gap": 0.0332549125775522,
|
|
"calib/step_q_w": 0.5324873096446701,
|
|
"calib/step_q_w_n": 985.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 2629.0,
|
|
"completions/max_terminated_length": 2629.0,
|
|
"completions/mean_length": 664.14453125,
|
|
"completions/mean_terminated_length": 677.37451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 212.0,
|
|
"epoch": 0.030933333333333334,
|
|
"grad_norm": 0.04360277205705643,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": -0.1501,
|
|
"mask/has_final_conf_rate": 0.953125,
|
|
"mask/share_final_conf": 0.022655852138996124,
|
|
"mask/share_reasoning": 0.8310916423797607,
|
|
"mask/share_step_conf": 0.12672126293182373,
|
|
"num_tokens": 7066750.0,
|
|
"reward": 1.0225168466567993,
|
|
"reward_std": 0.2292087972164154,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/final_brier_reward_step": 0.5434179306030273,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"rewards/step_l2_reward": 0.8015979528427124,
|
|
"step": 29
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.582615077495575,
|
|
"adv/mean_abs_reasoning": 0.5706990957260132,
|
|
"adv/mean_abs_step_conf": 0.7505882382392883,
|
|
"adv/ratio_final_to_reasoning": 1.0208796226571955,
|
|
"adv/ratio_step_to_reasoning": 1.3152083889048916,
|
|
"adv/std_final_conf": 0.8106728792190552,
|
|
"adv/std_reasoning": 0.8098874092102051,
|
|
"adv/std_step_conf": 0.9352176785469055,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.49640287769784175,
|
|
"calib/avg_num_step_conf": 7.48828125,
|
|
"calib/ece": 0.42261224489795923,
|
|
"calib/final_conf_rate": 0.95703125,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -7.19424460432494e-05,
|
|
"calib/mean_conf": 0.9899591836734695,
|
|
"calib/mu_c": 0.9899280575539567,
|
|
"calib/mu_w": 0.99,
|
|
"calib/nonempty_final_conf_rate": 0.95703125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.42261224489795923,
|
|
"calib/std_conf": 0.0006375714021148296,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5885585585585585,
|
|
"calib/step_q_c_n": 999.0,
|
|
"calib/step_q_gap": -0.006724665842312905,
|
|
"calib/step_q_w": 0.5952832244008714,
|
|
"calib/step_q_w_n": 918.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2727.0,
|
|
"completions/max_terminated_length": 2727.0,
|
|
"completions/mean_length": 713.55078125,
|
|
"completions/mean_terminated_length": 713.55078125,
|
|
"completions/min_length": 247.0,
|
|
"completions/min_terminated_length": 247.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.04562646895647049,
|
|
"learning_rate": 4.722222222222222e-06,
|
|
"loss": 0.0619,
|
|
"mask/has_final_conf_rate": 0.95703125,
|
|
"mask/share_final_conf": 0.02327953279018402,
|
|
"mask/share_reasoning": 0.8579779863357544,
|
|
"mask/share_step_conf": 0.11874253302812576,
|
|
"num_tokens": 7356403.0,
|
|
"reward": 1.0039558410644531,
|
|
"reward_std": 0.28641945123672485,
|
|
"rewards/accuracy_reward_step": 0.55859375,
|
|
"rewards/final_brier_reward_step": 0.5511531233787537,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"rewards/step_l2_reward": 0.7690889835357666,
|
|
"step": 30
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.40484005212783813,
|
|
"adv/mean_abs_reasoning": 0.39064812660217285,
|
|
"adv/mean_abs_step_conf": 0.7507187128067017,
|
|
"adv/ratio_final_to_reasoning": 1.036329178509329,
|
|
"adv/ratio_step_to_reasoning": 1.921726130716138,
|
|
"adv/std_final_conf": 0.6624351739883423,
|
|
"adv/std_reasoning": 0.6614300608634949,
|
|
"adv/std_step_conf": 0.9350001215934753,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 8.90625,
|
|
"calib/ece": 0.526,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9900000000000001,
|
|
"calib/mu_c": 0.9899999999999999,
|
|
"calib/mu_w": 0.99,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.526,
|
|
"calib/std_conf": 1.1102230246251565e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5834707158351409,
|
|
"calib/step_q_c_n": 922.0,
|
|
"calib/step_q_gap": -0.02866477753746588,
|
|
"calib/step_q_w": 0.6121354933726068,
|
|
"calib/step_q_w_n": 1358.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2598.0,
|
|
"completions/max_terminated_length": 2598.0,
|
|
"completions/mean_length": 677.59765625,
|
|
"completions/mean_terminated_length": 685.6324462890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 228.0,
|
|
"epoch": 0.03306666666666667,
|
|
"grad_norm": 0.0428326353430748,
|
|
"learning_rate": 4.694444444444445e-06,
|
|
"loss": -0.0011,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.024062704294919968,
|
|
"mask/share_reasoning": 0.8310606479644775,
|
|
"mask/share_step_conf": 0.1331578940153122,
|
|
"num_tokens": 7635780.0,
|
|
"reward": 0.9524698853492737,
|
|
"reward_std": 0.22021548449993134,
|
|
"rewards/accuracy_reward_step": 0.453125,
|
|
"rewards/final_brier_reward_step": 0.4634960889816284,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.7703374028205872,
|
|
"step": 31
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4177335500717163,
|
|
"adv/mean_abs_reasoning": 0.3731395900249481,
|
|
"adv/mean_abs_step_conf": 0.7657584547996521,
|
|
"adv/ratio_final_to_reasoning": 1.1195101276811357,
|
|
"adv/ratio_step_to_reasoning": 2.0522037201907564,
|
|
"adv/std_final_conf": 0.6802933216094971,
|
|
"adv/std_reasoning": 0.640331506729126,
|
|
"adv/std_step_conf": 0.934921383857727,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5158730158730158,
|
|
"calib/avg_num_step_conf": 8.03515625,
|
|
"calib/ece": 0.48984126984126997,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0003174603174602719,
|
|
"calib/mean_conf": 0.98984126984127,
|
|
"calib/mu_c": 0.9899999999999999,
|
|
"calib/mu_w": 0.9896825396825396,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.48984126984126997,
|
|
"calib/std_conf": 0.0012498425196844154,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.6162170706006322,
|
|
"calib/step_q_c_n": 949.0,
|
|
"calib/step_q_gap": 0.061623207784747724,
|
|
"calib/step_q_w": 0.5545938628158845,
|
|
"calib/step_q_w_n": 1108.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2830.0,
|
|
"completions/max_terminated_length": 2830.0,
|
|
"completions/mean_length": 643.13671875,
|
|
"completions/mean_terminated_length": 648.2008056640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 222.0,
|
|
"epoch": 0.034133333333333335,
|
|
"grad_norm": 0.05958246812224388,
|
|
"learning_rate": 4.666666666666667e-06,
|
|
"loss": -0.0464,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.024628007784485817,
|
|
"mask/share_reasoning": 0.8358334898948669,
|
|
"mask/share_step_conf": 0.1317259967327118,
|
|
"num_tokens": 7907127.0,
|
|
"reward": 0.9951291680335999,
|
|
"reward_std": 0.19687864184379578,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/final_brier_reward_step": 0.5022405982017517,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.7951366901397705,
|
|
"step": 32
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.534870982170105,
|
|
"adv/mean_abs_reasoning": 0.5064483880996704,
|
|
"adv/mean_abs_step_conf": 0.7465532422065735,
|
|
"adv/ratio_final_to_reasoning": 1.0561214029668131,
|
|
"adv/ratio_step_to_reasoning": 1.4740954058672011,
|
|
"adv/std_final_conf": 0.773914098739624,
|
|
"adv/std_reasoning": 0.7576087713241577,
|
|
"adv/std_step_conf": 0.9350910782814026,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5148124602670058,
|
|
"calib/avg_num_step_conf": 8.46484375,
|
|
"calib/ece": 0.5076095617529881,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.00029624920533999344,
|
|
"calib/mean_conf": 0.9896812749003985,
|
|
"calib/mu_c": 0.9898347107438017,
|
|
"calib/mu_w": 0.9895384615384617,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.5076095617529881,
|
|
"calib/std_conf": 0.001756606190043715,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5811604439959637,
|
|
"calib/step_q_c_n": 991.0,
|
|
"calib/step_q_gap": -0.0017307124666213758,
|
|
"calib/step_q_w": 0.582891156462585,
|
|
"calib/step_q_w_n": 1176.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2897.0,
|
|
"completions/max_terminated_length": 2897.0,
|
|
"completions/mean_length": 646.796875,
|
|
"completions/mean_terminated_length": 646.796875,
|
|
"completions/min_length": 221.0,
|
|
"completions/min_terminated_length": 221.0,
|
|
"epoch": 0.0352,
|
|
"grad_norm": 0.050102751702070236,
|
|
"learning_rate": 4.638888888888889e-06,
|
|
"loss": 0.0859,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.024720996618270874,
|
|
"mask/share_reasoning": 0.8411494493484497,
|
|
"mask/share_step_conf": 0.13412953913211823,
|
|
"num_tokens": 8179579.0,
|
|
"reward": 0.9854416847229004,
|
|
"reward_std": 0.23391908407211304,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/final_brier_reward_step": 0.48317378759384155,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.7980563044548035,
|
|
"step": 33
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6293555498123169,
|
|
"adv/mean_abs_reasoning": 0.5206021666526794,
|
|
"adv/mean_abs_step_conf": 0.7541499137878418,
|
|
"adv/ratio_final_to_reasoning": 1.2088992134990335,
|
|
"adv/ratio_step_to_reasoning": 1.4486107859227833,
|
|
"adv/std_final_conf": 0.8442690372467041,
|
|
"adv/std_reasoning": 0.7753821015357971,
|
|
"adv/std_step_conf": 0.9347956776618958,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.5234375,
|
|
"calib/avg_num_step_conf": 8.515625,
|
|
"calib/ece": 0.5046370967741937,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0004687499999997957,
|
|
"calib/mean_conf": 0.9885080645161292,
|
|
"calib/mu_c": 0.98875,
|
|
"calib/mu_w": 0.9882812500000002,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.5046370967741937,
|
|
"calib/std_conf": 0.0035627915109751234,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5425289778714436,
|
|
"calib/step_q_c_n": 949.0,
|
|
"calib/step_q_gap": -0.008348357628150227,
|
|
"calib/step_q_w": 0.5508773354995938,
|
|
"calib/step_q_w_n": 1231.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 3043.0,
|
|
"completions/max_terminated_length": 3043.0,
|
|
"completions/mean_length": 579.2734375,
|
|
"completions/mean_terminated_length": 590.812744140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 218.0,
|
|
"epoch": 0.03626666666666667,
|
|
"grad_norm": 0.07354850322008133,
|
|
"learning_rate": 4.611111111111112e-06,
|
|
"loss": -0.0333,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.02619742415845394,
|
|
"mask/share_reasoning": 0.8041331171989441,
|
|
"mask/share_step_conf": 0.15013819932937622,
|
|
"num_tokens": 8432985.0,
|
|
"reward": 0.9838611483573914,
|
|
"reward_std": 0.23865635693073273,
|
|
"rewards/accuracy_reward_step": 0.46875,
|
|
"rewards/final_brier_reward_step": 0.480173796415329,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8005531430244446,
|
|
"step": 34
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7236064672470093,
|
|
"adv/mean_abs_reasoning": 0.5600302219390869,
|
|
"adv/mean_abs_step_conf": 0.787669837474823,
|
|
"adv/ratio_final_to_reasoning": 1.2920846748976238,
|
|
"adv/ratio_step_to_reasoning": 1.406477376787883,
|
|
"adv/std_final_conf": 0.869294285774231,
|
|
"adv/std_reasoning": 0.7927871942520142,
|
|
"adv/std_step_conf": 0.9348177909851074,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5486153846153846,
|
|
"calib/avg_num_step_conf": 8.19140625,
|
|
"calib/ece": 0.49258823529411794,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0009723076923078278,
|
|
"calib/mean_conf": 0.9827843137254905,
|
|
"calib/mu_c": 0.9832800000000003,
|
|
"calib/mu_w": 0.9823076923076924,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.49258823529411794,
|
|
"calib/std_conf": 0.004482268882268098,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.51757852077001,
|
|
"calib/step_q_c_n": 987.0,
|
|
"calib/step_q_gap": 0.0436956378871271,
|
|
"calib/step_q_w": 0.4738828828828829,
|
|
"calib/step_q_w_n": 1110.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2573.0,
|
|
"completions/max_terminated_length": 2573.0,
|
|
"completions/mean_length": 656.984375,
|
|
"completions/mean_terminated_length": 656.984375,
|
|
"completions/min_length": 255.0,
|
|
"completions/min_terminated_length": 255.0,
|
|
"epoch": 0.037333333333333336,
|
|
"grad_norm": 0.06646399945020676,
|
|
"learning_rate": 4.583333333333333e-06,
|
|
"loss": -0.025,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.024299103766679764,
|
|
"mask/share_reasoning": 0.8431927561759949,
|
|
"mask/share_step_conf": 0.13250812888145447,
|
|
"num_tokens": 8710429.0,
|
|
"reward": 1.0185070037841797,
|
|
"reward_std": 0.22961218655109406,
|
|
"rewards/accuracy_reward_step": 0.48828125,
|
|
"rewards/final_brier_reward_step": 0.505856990814209,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.823375403881073,
|
|
"step": 35
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.3987484574317932,
|
|
"adv/mean_abs_reasoning": 0.3482089340686798,
|
|
"adv/mean_abs_step_conf": 0.7686375379562378,
|
|
"adv/ratio_final_to_reasoning": 1.1451413746700283,
|
|
"adv/ratio_step_to_reasoning": 2.2074032649737636,
|
|
"adv/std_final_conf": 0.6580438017845154,
|
|
"adv/std_reasoning": 0.6402967572212219,
|
|
"adv/std_step_conf": 0.9345235824584961,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.4962904498816101,
|
|
"calib/avg_num_step_conf": 9.3515625,
|
|
"calib/ece": 0.2594023904382473,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -7.419100236727427e-05,
|
|
"calib/mean_conf": 0.9805179282868529,
|
|
"calib/mu_c": 0.9804972375690613,
|
|
"calib/mu_w": 0.9805714285714285,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2594023904382473,
|
|
"calib/std_conf": 0.002216085097238788,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.48154626108998727,
|
|
"calib/step_q_c_n": 1578.0,
|
|
"calib/step_q_gap": 0.03804135912920298,
|
|
"calib/step_q_w": 0.4435049019607843,
|
|
"calib/step_q_w_n": 816.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2328.0,
|
|
"completions/max_terminated_length": 2328.0,
|
|
"completions/mean_length": 597.12109375,
|
|
"completions/mean_terminated_length": 604.2015991210938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.0,
|
|
"epoch": 0.0384,
|
|
"grad_norm": 0.04976440966129303,
|
|
"learning_rate": 4.555555555555556e-06,
|
|
"loss": -0.0363,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.027500033378601074,
|
|
"mask/share_reasoning": 0.800820529460907,
|
|
"mask/share_step_conf": 0.15996065735816956,
|
|
"num_tokens": 8966004.0,
|
|
"reward": 1.1506966352462769,
|
|
"reward_std": 0.17550379037857056,
|
|
"rewards/accuracy_reward_step": 0.70703125,
|
|
"rewards/final_brier_reward_step": 0.717279314994812,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8310760259628296,
|
|
"step": 36
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.403596431016922,
|
|
"adv/mean_abs_reasoning": 0.37260866165161133,
|
|
"adv/mean_abs_step_conf": 0.7900395393371582,
|
|
"adv/ratio_final_to_reasoning": 1.0831643827815367,
|
|
"adv/ratio_step_to_reasoning": 2.120293006167002,
|
|
"adv/std_final_conf": 0.6856962442398071,
|
|
"adv/std_reasoning": 0.6612308621406555,
|
|
"adv/std_step_conf": 0.9350776672363281,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.5011961722488039,
|
|
"calib/avg_num_step_conf": 9.46875,
|
|
"calib/ece": 0.5167479674796751,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 2.392344497625931e-05,
|
|
"calib/mean_conf": 0.9801626016260166,
|
|
"calib/mu_c": 0.9801754385964916,
|
|
"calib/mu_w": 0.9801515151515153,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.5167479674796751,
|
|
"calib/std_conf": 0.0012647438362686227,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.47984143763213527,
|
|
"calib/step_q_c_n": 946.0,
|
|
"calib/step_q_gap": 0.08639759460101215,
|
|
"calib/step_q_w": 0.3934438430311231,
|
|
"calib/step_q_w_n": 1478.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0234375,
|
|
"completions/max_length": 2873.0,
|
|
"completions/max_terminated_length": 2873.0,
|
|
"completions/mean_length": 656.3671875,
|
|
"completions/mean_terminated_length": 672.1200561523438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 293.0,
|
|
"epoch": 0.039466666666666664,
|
|
"grad_norm": 0.057230979204177856,
|
|
"learning_rate": 4.527777777777778e-06,
|
|
"loss": -0.1028,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.02309846132993698,
|
|
"mask/share_reasoning": 0.8134510517120361,
|
|
"mask/share_step_conf": 0.14001299440860748,
|
|
"num_tokens": 9241130.0,
|
|
"reward": 0.9769242405891418,
|
|
"reward_std": 0.17409387230873108,
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
|
"rewards/final_brier_reward_step": 0.4654015302658081,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/step_l2_reward": 0.804797887802124,
|
|
"step": 37
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.36862608790397644,
|
|
"adv/mean_abs_reasoning": 0.3565206229686737,
|
|
"adv/mean_abs_step_conf": 0.7455395460128784,
|
|
"adv/ratio_final_to_reasoning": 1.033954459168457,
|
|
"adv/ratio_step_to_reasoning": 2.091154053880318,
|
|
"adv/std_final_conf": 0.6618428230285645,
|
|
"adv/std_reasoning": 0.6613090634346008,
|
|
"adv/std_step_conf": 0.935276448726654,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 8.8359375,
|
|
"calib/ece": 0.4137349397590363,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0,
|
|
"calib/mean_conf": 0.9800000000000002,
|
|
"calib/mu_c": 0.9800000000000004,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4137349397590363,
|
|
"calib/std_conf": 2.220446049250313e-16,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4717914213624895,
|
|
"calib/step_q_c_n": 1189.0,
|
|
"calib/step_q_gap": 0.07567772145568619,
|
|
"calib/step_q_w": 0.39611369990680334,
|
|
"calib/step_q_w_n": 1073.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2482.0,
|
|
"completions/max_terminated_length": 2482.0,
|
|
"completions/mean_length": 628.61328125,
|
|
"completions/mean_terminated_length": 636.0671997070312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 252.0,
|
|
"epoch": 0.04053333333333333,
|
|
"grad_norm": 0.052082207053899765,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": -0.0047,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.025044191628694534,
|
|
"mask/share_reasoning": 0.8164798021316528,
|
|
"mask/share_step_conf": 0.14675727486610413,
|
|
"num_tokens": 9508943.0,
|
|
"reward": 1.0493329763412476,
|
|
"reward_std": 0.18128329515457153,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5671124458312988,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.8184314370155334,
|
|
"step": 38
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.43636903166770935,
|
|
"adv/mean_abs_reasoning": 0.4297660291194916,
|
|
"adv/mean_abs_step_conf": 0.7607371807098389,
|
|
"adv/ratio_final_to_reasoning": 1.0153641798113873,
|
|
"adv/ratio_step_to_reasoning": 1.7701193886088296,
|
|
"adv/std_final_conf": 0.7025984525680542,
|
|
"adv/std_reasoning": 0.7013990879058838,
|
|
"adv/std_step_conf": 0.9351004958152771,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.27734375,
|
|
"calib/ece": 0.4345454545454548,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000002,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4345454545454548,
|
|
"calib/std_conf": 2.220446049250313e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.46675302245250433,
|
|
"calib/step_q_c_n": 1158.0,
|
|
"calib/step_q_gap": 0.02381957956014613,
|
|
"calib/step_q_w": 0.4429334428923582,
|
|
"calib/step_q_w_n": 1217.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2644.0,
|
|
"completions/max_terminated_length": 2644.0,
|
|
"completions/mean_length": 665.265625,
|
|
"completions/mean_terminated_length": 665.265625,
|
|
"completions/min_length": 201.0,
|
|
"completions/min_terminated_length": 201.0,
|
|
"epoch": 0.0416,
|
|
"grad_norm": 0.051825810223817825,
|
|
"learning_rate": 4.472222222222223e-06,
|
|
"loss": 0.0314,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.02482118085026741,
|
|
"mask/share_reasoning": 0.8257359266281128,
|
|
"mask/share_step_conf": 0.1494428813457489,
|
|
"num_tokens": 9785339.0,
|
|
"reward": 1.0435525178909302,
|
|
"reward_std": 0.1997736394405365,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.5525765419006348,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.820414662361145,
|
|
"step": 39
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.35352271795272827,
|
|
"adv/mean_abs_reasoning": 0.3396007716655731,
|
|
"adv/mean_abs_step_conf": 0.7508925199508667,
|
|
"adv/ratio_final_to_reasoning": 1.0409950372576449,
|
|
"adv/ratio_step_to_reasoning": 2.2111036917499094,
|
|
"adv/std_final_conf": 0.6413707733154297,
|
|
"adv/std_reasoning": 0.6403252482414246,
|
|
"adv/std_step_conf": 0.9351861476898193,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.421875,
|
|
"calib/ece": 0.5157142857142858,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000001,
|
|
"calib/mu_c": 0.9800000000000004,
|
|
"calib/mu_w": 0.9800000000000003,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.5157142857142858,
|
|
"calib/std_conf": 1.1102230246251565e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.49702857142857143,
|
|
"calib/step_q_c_n": 1050.0,
|
|
"calib/step_q_gap": 0.05893752884413672,
|
|
"calib/step_q_w": 0.4380910425844347,
|
|
"calib/step_q_w_n": 1362.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2671.0,
|
|
"completions/max_terminated_length": 2671.0,
|
|
"completions/mean_length": 686.40234375,
|
|
"completions/mean_terminated_length": 691.8070678710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 215.0,
|
|
"epoch": 0.042666666666666665,
|
|
"grad_norm": 0.05516641214489937,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": -0.0246,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.023958513513207436,
|
|
"mask/share_reasoning": 0.8191065788269043,
|
|
"mask/share_step_conf": 0.1491224467754364,
|
|
"num_tokens": 10067818.0,
|
|
"reward": 1.0020737648010254,
|
|
"reward_std": 0.1748155653476715,
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
|
"rewards/final_brier_reward_step": 0.47773122787475586,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8254233002662659,
|
|
"step": 40
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.43498802185058594,
|
|
"adv/mean_abs_reasoning": 0.4303048849105835,
|
|
"adv/mean_abs_step_conf": 0.7763193845748901,
|
|
"adv/ratio_final_to_reasoning": 1.0108832995028063,
|
|
"adv/ratio_step_to_reasoning": 1.804114737707911,
|
|
"adv/std_final_conf": 0.6828204989433289,
|
|
"adv/std_reasoning": 0.6816701889038086,
|
|
"adv/std_step_conf": 0.9349315762519836,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.3828125,
|
|
"calib/ece": 0.250588235294118,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9800000000000003,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.250588235294118,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.47175101803374053,
|
|
"calib/step_q_c_n": 1719.0,
|
|
"calib/step_q_gap": 0.008690988751163697,
|
|
"calib/step_q_w": 0.46306002928257683,
|
|
"calib/step_q_w_n": 683.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2400.0,
|
|
"completions/max_terminated_length": 2400.0,
|
|
"completions/mean_length": 631.42578125,
|
|
"completions/mean_terminated_length": 631.42578125,
|
|
"completions/min_length": 218.0,
|
|
"completions/min_terminated_length": 218.0,
|
|
"epoch": 0.04373333333333333,
|
|
"grad_norm": 0.05414958298206329,
|
|
"learning_rate": 4.416666666666667e-06,
|
|
"loss": -0.0339,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.02605331689119339,
|
|
"mask/share_reasoning": 0.8131687641143799,
|
|
"mask/share_step_conf": 0.16077792644500732,
|
|
"num_tokens": 10336711.0,
|
|
"reward": 1.1583480834960938,
|
|
"reward_std": 0.18238838016986847,
|
|
"rewards/accuracy_reward_step": 0.7265625,
|
|
"rewards/final_brier_reward_step": 0.7369453310966492,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8234797120094299,
|
|
"step": 41
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.3892165422439575,
|
|
"adv/mean_abs_reasoning": 0.38388973474502563,
|
|
"adv/mean_abs_step_conf": 0.7426350116729736,
|
|
"adv/ratio_final_to_reasoning": 1.0138758789746485,
|
|
"adv/ratio_step_to_reasoning": 1.9345008330744289,
|
|
"adv/std_final_conf": 0.6827824115753174,
|
|
"adv/std_reasoning": 0.6815664768218994,
|
|
"adv/std_step_conf": 0.9347171783447266,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5045454545454545,
|
|
"calib/avg_num_step_conf": 9.15234375,
|
|
"calib/ece": 0.41133333333333355,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 9.090909090880839e-05,
|
|
"calib/mean_conf": 0.9799607843137257,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9799090909090914,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.41133333333333355,
|
|
"calib/std_conf": 0.0006249951941376173,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.49545524691358017,
|
|
"calib/step_q_c_n": 1296.0,
|
|
"calib/step_q_gap": 0.029810547773178986,
|
|
"calib/step_q_w": 0.4656446991404012,
|
|
"calib/step_q_w_n": 1047.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2875.0,
|
|
"completions/max_terminated_length": 2875.0,
|
|
"completions/mean_length": 573.15625,
|
|
"completions/mean_terminated_length": 573.15625,
|
|
"completions/min_length": 254.0,
|
|
"completions/min_terminated_length": 254.0,
|
|
"epoch": 0.0448,
|
|
"grad_norm": 0.05311352014541626,
|
|
"learning_rate": 4.388888888888889e-06,
|
|
"loss": 0.0432,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.027020232751965523,
|
|
"mask/share_reasoning": 0.807121753692627,
|
|
"mask/share_step_conf": 0.16585806012153625,
|
|
"num_tokens": 10587807.0,
|
|
"reward": 1.0639019012451172,
|
|
"reward_std": 0.17825046181678772,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/final_brier_reward_step": 0.5789812803268433,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.825777530670166,
|
|
"step": 42
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4678432047367096,
|
|
"adv/mean_abs_reasoning": 0.46236586570739746,
|
|
"adv/mean_abs_step_conf": 0.7421578764915466,
|
|
"adv/ratio_final_to_reasoning": 1.0118463308724837,
|
|
"adv/ratio_step_to_reasoning": 1.6051311991988875,
|
|
"adv/std_final_conf": 0.7400902509689331,
|
|
"adv/std_reasoning": 0.7392212748527527,
|
|
"adv/std_step_conf": 0.9346301555633545,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.496551724137931,
|
|
"calib/avg_num_step_conf": 9.6328125,
|
|
"calib/ece": 0.41133333333333355,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -6.896551724167033e-05,
|
|
"calib/mean_conf": 0.9799607843137257,
|
|
"calib/mu_c": 0.9799310344827589,
|
|
"calib/mu_w": 0.9800000000000005,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.41133333333333355,
|
|
"calib/std_conf": 0.0006249951941376173,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4877061469265367,
|
|
"calib/step_q_c_n": 1334.0,
|
|
"calib/step_q_gap": 0.015895192862932395,
|
|
"calib/step_q_w": 0.4718109540636043,
|
|
"calib/step_q_w_n": 1132.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2056.0,
|
|
"completions/max_terminated_length": 2056.0,
|
|
"completions/mean_length": 668.37109375,
|
|
"completions/mean_terminated_length": 670.9921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 204.0,
|
|
"epoch": 0.04586666666666667,
|
|
"grad_norm": 0.053375594317913055,
|
|
"learning_rate": 4.361111111111112e-06,
|
|
"loss": 0.0592,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.02442849799990654,
|
|
"mask/share_reasoning": 0.8149913549423218,
|
|
"mask/share_step_conf": 0.15667389333248138,
|
|
"num_tokens": 10864134.0,
|
|
"reward": 1.0666247606277466,
|
|
"reward_std": 0.195322185754776,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/final_brier_reward_step": 0.5828839540481567,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8262854814529419,
|
|
"step": 43
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.44674229621887207,
|
|
"adv/mean_abs_reasoning": 0.39439481496810913,
|
|
"adv/mean_abs_step_conf": 0.7650898694992065,
|
|
"adv/ratio_final_to_reasoning": 1.132728624373512,
|
|
"adv/ratio_step_to_reasoning": 1.939908539520409,
|
|
"adv/std_final_conf": 0.7148959636688232,
|
|
"adv/std_reasoning": 0.6815752387046814,
|
|
"adv/std_step_conf": 0.934475302696228,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5001860003720008,
|
|
"calib/avg_num_step_conf": 10.48828125,
|
|
"calib/ece": 0.4796062992125987,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0001574803149606563,
|
|
"calib/mean_conf": 0.9796062992125987,
|
|
"calib/mu_c": 0.9796850393700792,
|
|
"calib/mu_w": 0.9795275590551186,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4796062992125987,
|
|
"calib/std_conf": 0.0026327958263465357,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.45573998364677026,
|
|
"calib/step_q_c_n": 1223.0,
|
|
"calib/step_q_gap": 0.035582664905320194,
|
|
"calib/step_q_w": 0.42015731874145007,
|
|
"calib/step_q_w_n": 1462.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2817.0,
|
|
"completions/max_terminated_length": 2817.0,
|
|
"completions/mean_length": 695.90625,
|
|
"completions/mean_terminated_length": 698.6353149414062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 234.0,
|
|
"epoch": 0.046933333333333334,
|
|
"grad_norm": 0.07896358519792557,
|
|
"learning_rate": 4.333333333333334e-06,
|
|
"loss": -0.0264,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.022946707904338837,
|
|
"mask/share_reasoning": 0.8176969289779663,
|
|
"mask/share_step_conf": 0.15545010566711426,
|
|
"num_tokens": 11148606.0,
|
|
"reward": 1.032286524772644,
|
|
"reward_std": 0.16667400300502777,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/final_brier_reward_step": 0.5156804323196411,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8346784710884094,
|
|
"step": 44
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5271316766738892,
|
|
"adv/mean_abs_reasoning": 0.5128173828125,
|
|
"adv/mean_abs_step_conf": 0.768696665763855,
|
|
"adv/ratio_final_to_reasoning": 1.0279130433974053,
|
|
"adv/ratio_step_to_reasoning": 1.4989676472119733,
|
|
"adv/std_final_conf": 0.7760584354400635,
|
|
"adv/std_reasoning": 0.7754290699958801,
|
|
"adv/std_step_conf": 0.9351422190666199,
|
|
"calib/answer_extract_rate": 0.953125,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.515625,
|
|
"calib/ece": 0.443114754098361,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9800000000000003,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.443114754098361,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.458319198149576,
|
|
"calib/step_q_c_n": 1297.0,
|
|
"calib/step_q_gap": 0.009781563740973831,
|
|
"calib/step_q_w": 0.44853763440860217,
|
|
"calib/step_q_w_n": 1395.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2839.0,
|
|
"completions/max_terminated_length": 2839.0,
|
|
"completions/mean_length": 712.76953125,
|
|
"completions/mean_terminated_length": 715.5647583007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.044064924120903015,
|
|
"learning_rate": 4.305555555555556e-06,
|
|
"loss": 0.0043,
|
|
"mask/has_final_conf_rate": 0.953125,
|
|
"mask/share_final_conf": 0.023293491452932358,
|
|
"mask/share_reasoning": 0.8162682056427002,
|
|
"mask/share_step_conf": 0.15653207898139954,
|
|
"num_tokens": 11436123.0,
|
|
"reward": 1.0050601959228516,
|
|
"reward_std": 0.22470340132713318,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5289937257766724,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"rewards/step_l2_reward": 0.7921053171157837,
|
|
"step": 45
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4715219736099243,
|
|
"adv/mean_abs_reasoning": 0.4628002345561981,
|
|
"adv/mean_abs_step_conf": 0.7624514102935791,
|
|
"adv/ratio_final_to_reasoning": 1.0188455804524168,
|
|
"adv/ratio_step_to_reasoning": 1.647474122446656,
|
|
"adv/std_final_conf": 0.740247905254364,
|
|
"adv/std_reasoning": 0.7393082976341248,
|
|
"adv/std_step_conf": 0.9344412088394165,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 11.03125,
|
|
"calib/ece": 0.41495934959349634,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -2.220446049250313e-16,
|
|
"calib/mean_conf": 0.9800000000000004,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.41495934959349634,
|
|
"calib/std_conf": 4.440892098500626e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.46224945926459987,
|
|
"calib/step_q_c_n": 1387.0,
|
|
"calib/step_q_gap": 0.02676581277886575,
|
|
"calib/step_q_w": 0.4354836464857341,
|
|
"calib/step_q_w_n": 1437.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3053.0,
|
|
"completions/max_terminated_length": 3053.0,
|
|
"completions/mean_length": 758.50390625,
|
|
"completions/mean_terminated_length": 761.4784545898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 229.0,
|
|
"epoch": 0.04906666666666667,
|
|
"grad_norm": 0.04323741793632507,
|
|
"learning_rate": 4.277777777777778e-06,
|
|
"loss": 0.1058,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.0240943506360054,
|
|
"mask/share_reasoning": 0.8130663633346558,
|
|
"mask/share_step_conf": 0.15893307328224182,
|
|
"num_tokens": 11735068.0,
|
|
"reward": 1.0368883609771729,
|
|
"reward_std": 0.20708303153514862,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/final_brier_reward_step": 0.5593031048774719,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/step_l2_reward": 0.8091282844543457,
|
|
"step": 46
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5743204355239868,
|
|
"adv/mean_abs_reasoning": 0.5580986738204956,
|
|
"adv/mean_abs_step_conf": 0.774113118648529,
|
|
"adv/ratio_final_to_reasoning": 1.029066117631931,
|
|
"adv/ratio_step_to_reasoning": 1.387054216325752,
|
|
"adv/std_final_conf": 0.7937548756599426,
|
|
"adv/std_reasoning": 0.7929351925849915,
|
|
"adv/std_step_conf": 0.9345194697380066,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.890625,
|
|
"calib/ece": 0.37919354838709707,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -2.220446049250313e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.37919354838709707,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5025,
|
|
"calib/step_q_c_n": 1464.0,
|
|
"calib/step_q_gap": 0.04697129909365555,
|
|
"calib/step_q_w": 0.4555287009063444,
|
|
"calib/step_q_w_n": 1324.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2756.0,
|
|
"completions/max_terminated_length": 2756.0,
|
|
"completions/mean_length": 708.640625,
|
|
"completions/mean_terminated_length": 717.0435180664062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 202.0,
|
|
"epoch": 0.050133333333333335,
|
|
"grad_norm": 0.04193958640098572,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": 0.0715,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.023382706567645073,
|
|
"mask/share_reasoning": 0.8059793710708618,
|
|
"mask/share_step_conf": 0.15891912579536438,
|
|
"num_tokens": 12022456.0,
|
|
"reward": 1.0744290351867676,
|
|
"reward_std": 0.24675029516220093,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/final_brier_reward_step": 0.596957802772522,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8283501863479614,
|
|
"step": 47
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5286183953285217,
|
|
"adv/mean_abs_reasoning": 0.5230193138122559,
|
|
"adv/mean_abs_step_conf": 0.7533445954322815,
|
|
"adv/ratio_final_to_reasoning": 1.0107053054608146,
|
|
"adv/ratio_step_to_reasoning": 1.4403762452694122,
|
|
"adv/std_final_conf": 0.7767484784126282,
|
|
"adv/std_reasoning": 0.7754042148590088,
|
|
"adv/std_step_conf": 0.9348289370536804,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.06640625,
|
|
"calib/ece": 0.49574803149606333,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 2.220446049250313e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000004,
|
|
"calib/mu_w": 0.9800000000000002,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.49574803149606333,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5308748906386701,
|
|
"calib/step_q_c_n": 1143.0,
|
|
"calib/step_q_gap": 0.04723472327465339,
|
|
"calib/step_q_w": 0.4836401673640167,
|
|
"calib/step_q_w_n": 1434.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2207.0,
|
|
"completions/max_terminated_length": 2207.0,
|
|
"completions/mean_length": 636.82421875,
|
|
"completions/mean_terminated_length": 641.8385620117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 200.0,
|
|
"epoch": 0.0512,
|
|
"grad_norm": 0.04802514612674713,
|
|
"learning_rate": 4.222222222222223e-06,
|
|
"loss": -0.0538,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.025706924498081207,
|
|
"mask/share_reasoning": 0.8068351745605469,
|
|
"mask/share_step_conf": 0.15964537858963013,
|
|
"num_tokens": 12289171.0,
|
|
"reward": 1.0197627544403076,
|
|
"reward_std": 0.21757322549819946,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/final_brier_reward_step": 0.5005406141281128,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8296357989311218,
|
|
"step": 48
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4833810031414032,
|
|
"adv/mean_abs_reasoning": 0.4688076376914978,
|
|
"adv/mean_abs_step_conf": 0.7697415351867676,
|
|
"adv/ratio_final_to_reasoning": 1.031086023942928,
|
|
"adv/ratio_step_to_reasoning": 1.6419133847245497,
|
|
"adv/std_final_conf": 0.7202240824699402,
|
|
"adv/std_reasoning": 0.7207056879997253,
|
|
"adv/std_step_conf": 0.9345253109931946,
|
|
"calib/answer_extract_rate": 0.953125,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.875,
|
|
"calib/ece": 0.41673469387755135,
|
|
"calib/final_conf_rate": 0.95703125,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.95703125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.41673469387755135,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5488761632068718,
|
|
"calib/step_q_c_n": 1397.0,
|
|
"calib/step_q_gap": 0.06362021511747018,
|
|
"calib/step_q_w": 0.48525594808940165,
|
|
"calib/step_q_w_n": 1387.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 3034.0,
|
|
"completions/max_terminated_length": 3034.0,
|
|
"completions/mean_length": 700.39453125,
|
|
"completions/mean_terminated_length": 714.3466186523438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 206.0,
|
|
"epoch": 0.05226666666666667,
|
|
"grad_norm": 0.0385357141494751,
|
|
"learning_rate": 4.194444444444445e-06,
|
|
"loss": -0.0333,
|
|
"mask/has_final_conf_rate": 0.95703125,
|
|
"mask/share_final_conf": 0.022826887667179108,
|
|
"mask/share_reasoning": 0.8002016544342041,
|
|
"mask/share_step_conf": 0.1574402153491974,
|
|
"num_tokens": 12573008.0,
|
|
"reward": 1.0233741998672485,
|
|
"reward_std": 0.2162887454032898,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.555243730545044,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"rewards/step_l2_reward": 0.7953780889511108,
|
|
"step": 49
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.3921908438205719,
|
|
"adv/mean_abs_reasoning": 0.3828577399253845,
|
|
"adv/mean_abs_step_conf": 0.7531270980834961,
|
|
"adv/ratio_final_to_reasoning": 1.0243774721571681,
|
|
"adv/ratio_step_to_reasoning": 1.9671199496457188,
|
|
"adv/std_final_conf": 0.6827874183654785,
|
|
"adv/std_reasoning": 0.6815925240516663,
|
|
"adv/std_step_conf": 0.9339188933372498,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.4375,
|
|
"calib/ece": 0.35698412698412707,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000001,
|
|
"calib/mu_c": 0.9800000000000004,
|
|
"calib/mu_w": 0.9800000000000005,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.35698412698412707,
|
|
"calib/std_conf": 1.1102230246251565e-16,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5290221270521057,
|
|
"calib/step_q_c_n": 1401.0,
|
|
"calib/step_q_gap": 0.016667447249150036,
|
|
"calib/step_q_w": 0.5123546798029557,
|
|
"calib/step_q_w_n": 1015.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2673.0,
|
|
"completions/max_terminated_length": 2673.0,
|
|
"completions/mean_length": 670.04296875,
|
|
"completions/mean_terminated_length": 672.670654296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 231.0,
|
|
"epoch": 0.05333333333333334,
|
|
"grad_norm": 0.04379646107554436,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0075,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.025415990501642227,
|
|
"mask/share_reasoning": 0.8144956231117249,
|
|
"mask/share_step_conf": 0.15618211030960083,
|
|
"num_tokens": 12849899.0,
|
|
"reward": 1.0978294610977173,
|
|
"reward_std": 0.19145922362804413,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/final_brier_reward_step": 0.6238265037536621,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8348673582077026,
|
|
"step": 50
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.42505133152008057,
|
|
"adv/mean_abs_reasoning": 0.41325896978378296,
|
|
"adv/mean_abs_step_conf": 0.7430156469345093,
|
|
"adv/ratio_final_to_reasoning": 1.0285350412175382,
|
|
"adv/ratio_step_to_reasoning": 1.7979419716485645,
|
|
"adv/std_final_conf": 0.7020641565322876,
|
|
"adv/std_reasoning": 0.7014035582542419,
|
|
"adv/std_step_conf": 0.9343358874320984,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.30859375,
|
|
"calib/ece": 0.38160642570281145,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -2.220446049250313e-16,
|
|
"calib/mean_conf": 0.9800000000000002,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.38160642570281145,
|
|
"calib/std_conf": 2.220446049250313e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5214336917562725,
|
|
"calib/step_q_c_n": 1395.0,
|
|
"calib/step_q_gap": 0.08580668210997028,
|
|
"calib/step_q_w": 0.43562700964630224,
|
|
"calib/step_q_w_n": 1244.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3009.0,
|
|
"completions/max_terminated_length": 3009.0,
|
|
"completions/mean_length": 700.93359375,
|
|
"completions/mean_terminated_length": 700.93359375,
|
|
"completions/min_length": 197.0,
|
|
"completions/min_terminated_length": 197.0,
|
|
"epoch": 0.0544,
|
|
"grad_norm": 0.04266020655632019,
|
|
"learning_rate": 4.138888888888889e-06,
|
|
"loss": 0.1176,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.024903567507863045,
|
|
"mask/share_reasoning": 0.8158866167068481,
|
|
"mask/share_step_conf": 0.15920981764793396,
|
|
"num_tokens": 13138634.0,
|
|
"reward": 1.0784484148025513,
|
|
"reward_std": 0.19629380106925964,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/final_brier_reward_step": 0.5972671508789062,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.832461416721344,
|
|
"step": 51
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.44877541065216064,
|
|
"adv/mean_abs_reasoning": 0.4400636851787567,
|
|
"adv/mean_abs_step_conf": 0.7490113973617554,
|
|
"adv/ratio_final_to_reasoning": 1.019796510747905,
|
|
"adv/ratio_step_to_reasoning": 1.7020522769505557,
|
|
"adv/std_final_conf": 0.7218416929244995,
|
|
"adv/std_reasoning": 0.7205371856689453,
|
|
"adv/std_step_conf": 0.9344486594200134,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.4765625,
|
|
"calib/ece": 0.3107086614173231,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3107086614173231,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5440012886597938,
|
|
"calib/step_q_c_n": 1552.0,
|
|
"calib/step_q_gap": 0.06388674112256587,
|
|
"calib/step_q_w": 0.48011454753722793,
|
|
"calib/step_q_w_n": 873.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1351.0,
|
|
"completions/max_terminated_length": 1351.0,
|
|
"completions/mean_length": 615.79296875,
|
|
"completions/mean_terminated_length": 620.6417236328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 228.0,
|
|
"epoch": 0.055466666666666664,
|
|
"grad_norm": 0.052587732672691345,
|
|
"learning_rate": 4.111111111111111e-06,
|
|
"loss": -0.0315,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.025957010686397552,
|
|
"mask/share_reasoning": 0.8111605644226074,
|
|
"mask/share_step_conf": 0.15506988763809204,
|
|
"num_tokens": 13404229.0,
|
|
"reward": 1.1470290422439575,
|
|
"reward_std": 0.19214141368865967,
|
|
"rewards/accuracy_reward_step": 0.6640625,
|
|
"rewards/final_brier_reward_step": 0.6728858947753906,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8604689836502075,
|
|
"step": 52
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4245341718196869,
|
|
"adv/mean_abs_reasoning": 0.39320626854896545,
|
|
"adv/mean_abs_step_conf": 0.752773106098175,
|
|
"adv/ratio_final_to_reasoning": 1.079672949737881,
|
|
"adv/ratio_step_to_reasoning": 1.914448385769906,
|
|
"adv/std_final_conf": 0.681718111038208,
|
|
"adv/std_reasoning": 0.6613555550575256,
|
|
"adv/std_step_conf": 0.9345915913581848,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5052631578947369,
|
|
"calib/avg_num_step_conf": 10.28125,
|
|
"calib/ece": 0.3554545454545458,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0001052631578947194,
|
|
"calib/mean_conf": 0.9799604743083007,
|
|
"calib/mu_c": 0.9800000000000004,
|
|
"calib/mu_w": 0.9798947368421057,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3554545454545458,
|
|
"calib/std_conf": 0.0006274509038097849,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5141645408163266,
|
|
"calib/step_q_c_n": 1568.0,
|
|
"calib/step_q_gap": 0.04017017991407096,
|
|
"calib/step_q_w": 0.4739943609022556,
|
|
"calib/step_q_w_n": 1064.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2672.0,
|
|
"completions/max_terminated_length": 2672.0,
|
|
"completions/mean_length": 664.16796875,
|
|
"completions/mean_terminated_length": 666.7725830078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 232.0,
|
|
"epoch": 0.05653333333333333,
|
|
"grad_norm": 0.044773347675800323,
|
|
"learning_rate": 4.083333333333334e-06,
|
|
"loss": 0.014,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.02365219034254551,
|
|
"mask/share_reasoning": 0.8136025667190552,
|
|
"mask/share_step_conf": 0.15883903205394745,
|
|
"num_tokens": 13680080.0,
|
|
"reward": 1.1072542667388916,
|
|
"reward_std": 0.18055878579616547,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/final_brier_reward_step": 0.6317120790481567,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8411349654197693,
|
|
"step": 53
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.3496309518814087,
|
|
"adv/mean_abs_reasoning": 0.34337419271469116,
|
|
"adv/mean_abs_step_conf": 0.7803263664245605,
|
|
"adv/ratio_final_to_reasoning": 1.0182214018975977,
|
|
"adv/ratio_step_to_reasoning": 2.272524793594293,
|
|
"adv/std_final_conf": 0.6413673162460327,
|
|
"adv/std_reasoning": 0.6402059197425842,
|
|
"adv/std_step_conf": 0.9342555999755859,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.15234375,
|
|
"calib/ece": 0.2988976377952759,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000004,
|
|
"calib/mu_w": 0.9800000000000003,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2988976377952759,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5104761904761904,
|
|
"calib/step_q_c_n": 1470.0,
|
|
"calib/step_q_gap": 0.03387825233186054,
|
|
"calib/step_q_w": 0.4765979381443299,
|
|
"calib/step_q_w_n": 873.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2502.0,
|
|
"completions/max_terminated_length": 2502.0,
|
|
"completions/mean_length": 611.46484375,
|
|
"completions/mean_terminated_length": 611.46484375,
|
|
"completions/min_length": 190.0,
|
|
"completions/min_terminated_length": 190.0,
|
|
"epoch": 0.0576,
|
|
"grad_norm": 0.050735846161842346,
|
|
"learning_rate": 4.055555555555556e-06,
|
|
"loss": 0.0082,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.026796435937285423,
|
|
"mask/share_reasoning": 0.8149226307868958,
|
|
"mask/share_step_conf": 0.15828096866607666,
|
|
"num_tokens": 13942847.0,
|
|
"reward": 1.1371355056762695,
|
|
"reward_std": 0.15493734180927277,
|
|
"rewards/accuracy_reward_step": 0.67578125,
|
|
"rewards/final_brier_reward_step": 0.6880406141281128,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8350911140441895,
|
|
"step": 54
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5220646858215332,
|
|
"adv/mean_abs_reasoning": 0.5028437376022339,
|
|
"adv/mean_abs_step_conf": 0.7370898127555847,
|
|
"adv/ratio_final_to_reasoning": 1.0382244955678532,
|
|
"adv/ratio_step_to_reasoning": 1.4658426816058854,
|
|
"adv/std_final_conf": 0.7588990926742554,
|
|
"adv/std_reasoning": 0.7576790452003479,
|
|
"adv/std_step_conf": 0.9348087310791016,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.504,
|
|
"calib/avg_num_step_conf": 9.53515625,
|
|
"calib/ece": 0.4778486055776895,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.000320000000000098,
|
|
"calib/mean_conf": 0.9798406374501994,
|
|
"calib/mu_c": 0.9800000000000005,
|
|
"calib/mu_w": 0.9796800000000004,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4778486055776895,
|
|
"calib/std_conf": 0.002519743155512655,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.45781411359724616,
|
|
"calib/step_q_c_n": 1162.0,
|
|
"calib/step_q_gap": 0.03114484072781687,
|
|
"calib/step_q_w": 0.4266692728694293,
|
|
"calib/step_q_w_n": 1279.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2713.0,
|
|
"completions/max_terminated_length": 2713.0,
|
|
"completions/mean_length": 630.19921875,
|
|
"completions/mean_terminated_length": 637.6719360351562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.0,
|
|
"epoch": 0.058666666666666666,
|
|
"grad_norm": 0.0496719554066658,
|
|
"learning_rate": 4.027777777777779e-06,
|
|
"loss": -0.0886,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.026188170537352562,
|
|
"mask/share_reasoning": 0.8033748865127563,
|
|
"mask/share_step_conf": 0.15871819853782654,
|
|
"num_tokens": 14212002.0,
|
|
"reward": 1.0265394449234009,
|
|
"reward_std": 0.22830066084861755,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/final_brier_reward_step": 0.5116264820098877,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.831280529499054,
|
|
"step": 55
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.44553542137145996,
|
|
"adv/mean_abs_reasoning": 0.44030097126960754,
|
|
"adv/mean_abs_step_conf": 0.7443605661392212,
|
|
"adv/ratio_final_to_reasoning": 1.011888345571346,
|
|
"adv/ratio_step_to_reasoning": 1.6905721647464416,
|
|
"adv/std_final_conf": 0.7214465141296387,
|
|
"adv/std_reasoning": 0.7205111980438232,
|
|
"adv/std_step_conf": 0.9344826936721802,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.29296875,
|
|
"calib/ece": 0.4305928853754942,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -1.1102230246251565e-16,
|
|
"calib/mean_conf": 0.9800000000000002,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9800000000000003,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4305928853754942,
|
|
"calib/std_conf": 2.220446049250313e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4614436885865457,
|
|
"calib/step_q_c_n": 1323.0,
|
|
"calib/step_q_gap": 0.03695435931825297,
|
|
"calib/step_q_w": 0.42448932926829275,
|
|
"calib/step_q_w_n": 1312.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2934.0,
|
|
"completions/max_terminated_length": 2934.0,
|
|
"completions/mean_length": 670.90234375,
|
|
"completions/mean_terminated_length": 673.5333862304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 221.0,
|
|
"epoch": 0.05973333333333333,
|
|
"grad_norm": 0.052368298172950745,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.0264,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.024397222325205803,
|
|
"mask/share_reasoning": 0.8169806003570557,
|
|
"mask/share_step_conf": 0.154715895652771,
|
|
"num_tokens": 14490593.0,
|
|
"reward": 1.0649067163467407,
|
|
"reward_std": 0.18028205633163452,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/final_brier_reward_step": 0.5603859424591064,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8421182036399841,
|
|
"step": 56
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.44250503182411194,
|
|
"adv/mean_abs_reasoning": 0.4391246736049652,
|
|
"adv/mean_abs_step_conf": 0.7414326667785645,
|
|
"adv/ratio_final_to_reasoning": 1.007697946442854,
|
|
"adv/ratio_step_to_reasoning": 1.6884331747788655,
|
|
"adv/std_final_conf": 0.7213237285614014,
|
|
"adv/std_reasoning": 0.7205445766448975,
|
|
"adv/std_step_conf": 0.9351280927658081,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.78515625,
|
|
"calib/ece": 0.3266135458167334,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -2.220446049250313e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9800000000000005,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.3266135458167334,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.44513675783855905,
|
|
"calib/step_q_c_n": 1499.0,
|
|
"calib/step_q_gap": 0.021886260820666414,
|
|
"calib/step_q_w": 0.42325049701789264,
|
|
"calib/step_q_w_n": 1006.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2749.0,
|
|
"completions/max_terminated_length": 2749.0,
|
|
"completions/mean_length": 620.91796875,
|
|
"completions/mean_terminated_length": 625.8070678710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 264.0,
|
|
"epoch": 0.0608,
|
|
"grad_norm": 0.053272590041160583,
|
|
"learning_rate": 3.972222222222223e-06,
|
|
"loss": -0.0094,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.0259304977953434,
|
|
"mask/share_reasoning": 0.8077971935272217,
|
|
"mask/share_step_conf": 0.15845987200737,
|
|
"num_tokens": 14756340.0,
|
|
"reward": 1.1104764938354492,
|
|
"reward_std": 0.2054135799407959,
|
|
"rewards/accuracy_reward_step": 0.640625,
|
|
"rewards/final_brier_reward_step": 0.6499218940734863,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8317291140556335,
|
|
"step": 57
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6040040254592896,
|
|
"adv/mean_abs_reasoning": 0.5898081660270691,
|
|
"adv/mean_abs_step_conf": 0.7271995544433594,
|
|
"adv/ratio_final_to_reasoning": 1.0240686044207277,
|
|
"adv/ratio_step_to_reasoning": 1.2329424994939537,
|
|
"adv/std_final_conf": 0.8113057017326355,
|
|
"adv/std_reasoning": 0.8100230693817139,
|
|
"adv/std_step_conf": 0.9353846907615662,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.5625,
|
|
"calib/ece": 0.4779919678714861,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0,
|
|
"calib/mean_conf": 0.9800000000000002,
|
|
"calib/mu_c": 0.9800000000000004,
|
|
"calib/mu_w": 0.9800000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.4779919678714861,
|
|
"calib/std_conf": 2.220446049250313e-16,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.44436501261564343,
|
|
"calib/step_q_c_n": 1189.0,
|
|
"calib/step_q_gap": 0.03760593670805268,
|
|
"calib/step_q_w": 0.40675907590759075,
|
|
"calib/step_q_w_n": 1515.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2931.0,
|
|
"completions/max_terminated_length": 2931.0,
|
|
"completions/mean_length": 702.85546875,
|
|
"completions/mean_terminated_length": 711.1897583007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.0,
|
|
"epoch": 0.06186666666666667,
|
|
"grad_norm": 0.04196161404252052,
|
|
"learning_rate": 3.944444444444445e-06,
|
|
"loss": -0.0474,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.0239163376390934,
|
|
"mask/share_reasoning": 0.8132352828979492,
|
|
"mask/share_step_conf": 0.15112963318824768,
|
|
"num_tokens": 15042591.0,
|
|
"reward": 1.0031821727752686,
|
|
"reward_std": 0.26209405064582825,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/final_brier_reward_step": 0.49945777654647827,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8103334903717041,
|
|
"step": 58
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.462576299905777,
|
|
"adv/mean_abs_reasoning": 0.4544098377227783,
|
|
"adv/mean_abs_step_conf": 0.7671006917953491,
|
|
"adv/ratio_final_to_reasoning": 1.0179715787490955,
|
|
"adv/ratio_step_to_reasoning": 1.688125185932559,
|
|
"adv/std_final_conf": 0.7218502759933472,
|
|
"adv/std_reasoning": 0.7205907106399536,
|
|
"adv/std_step_conf": 0.9345336556434631,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 10.30078125,
|
|
"calib/ece": 0.3887301587301588,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -3.3306690738754696e-16,
|
|
"calib/mean_conf": 0.9800000000000001,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9800000000000005,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.3887301587301588,
|
|
"calib/std_conf": 1.1102230246251565e-16,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.502050909090909,
|
|
"calib/step_q_c_n": 1375.0,
|
|
"calib/step_q_gap": 0.08549781875810397,
|
|
"calib/step_q_w": 0.416553090332805,
|
|
"calib/step_q_w_n": 1262.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2621.0,
|
|
"completions/max_terminated_length": 2621.0,
|
|
"completions/mean_length": 665.68359375,
|
|
"completions/mean_terminated_length": 668.2941284179688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.06293333333333333,
|
|
"grad_norm": 0.0447392538189888,
|
|
"learning_rate": 3.916666666666667e-06,
|
|
"loss": 0.0181,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.026031965389847755,
|
|
"mask/share_reasoning": 0.8124123811721802,
|
|
"mask/share_step_conf": 0.15764933824539185,
|
|
"num_tokens": 15319254.0,
|
|
"reward": 1.0689520835876465,
|
|
"reward_std": 0.19910673797130585,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/final_brier_reward_step": 0.5899218916893005,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8241757750511169,
|
|
"step": 59
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4998510181903839,
|
|
"adv/mean_abs_reasoning": 0.4936041533946991,
|
|
"adv/mean_abs_step_conf": 0.7445494532585144,
|
|
"adv/ratio_final_to_reasoning": 1.0126556163531502,
|
|
"adv/ratio_step_to_reasoning": 1.5083938174708849,
|
|
"adv/std_final_conf": 0.7406123280525208,
|
|
"adv/std_reasoning": 0.7393380999565125,
|
|
"adv/std_step_conf": 0.9346485733985901,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 8.453125,
|
|
"calib/ece": 0.4266403162055338,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0,
|
|
"calib/mean_conf": 0.9800000000000002,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9800000000000003,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4266403162055338,
|
|
"calib/std_conf": 2.220446049250313e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5135161870503597,
|
|
"calib/step_q_c_n": 1112.0,
|
|
"calib/step_q_gap": 0.06483747982602511,
|
|
"calib/step_q_w": 0.4486787072243346,
|
|
"calib/step_q_w_n": 1052.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2759.0,
|
|
"completions/max_terminated_length": 2759.0,
|
|
"completions/mean_length": 590.5625,
|
|
"completions/mean_terminated_length": 592.8784790039062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.04761456325650215,
|
|
"learning_rate": 3.88888888888889e-06,
|
|
"loss": 0.0079,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.028008155524730682,
|
|
"mask/share_reasoning": 0.8162366151809692,
|
|
"mask/share_step_conf": 0.15184903144836426,
|
|
"num_tokens": 15579294.0,
|
|
"reward": 1.07675039768219,
|
|
"reward_std": 0.21444371342658997,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5641359090805054,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8543682098388672,
|
|
"step": 60
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4446122646331787,
|
|
"adv/mean_abs_reasoning": 0.4401038885116577,
|
|
"adv/mean_abs_step_conf": 0.7297759056091309,
|
|
"adv/ratio_final_to_reasoning": 1.0102438906794653,
|
|
"adv/ratio_step_to_reasoning": 1.6581900879746492,
|
|
"adv/std_final_conf": 0.7213276624679565,
|
|
"adv/std_reasoning": 0.7204954028129578,
|
|
"adv/std_step_conf": 0.9347290396690369,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 8.38671875,
|
|
"calib/ece": 0.2858823529411768,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -3.3306690738754696e-16,
|
|
"calib/mean_conf": 0.9800000000000003,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9800000000000005,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2858823529411768,
|
|
"calib/std_conf": 3.3306690738754696e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.510771954674221,
|
|
"calib/step_q_c_n": 1412.0,
|
|
"calib/step_q_gap": 0.05141141045653391,
|
|
"calib/step_q_w": 0.4593605442176871,
|
|
"calib/step_q_w_n": 735.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1603.0,
|
|
"completions/max_terminated_length": 1603.0,
|
|
"completions/mean_length": 522.85546875,
|
|
"completions/mean_terminated_length": 524.9058837890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.06506666666666666,
|
|
"grad_norm": 0.04688733443617821,
|
|
"learning_rate": 3.861111111111112e-06,
|
|
"loss": -0.0613,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.031810835003852844,
|
|
"mask/share_reasoning": 0.8003161549568176,
|
|
"mask/share_step_conf": 0.16396674513816833,
|
|
"num_tokens": 15817209.0,
|
|
"reward": 1.1565698385238647,
|
|
"reward_std": 0.19978997111320496,
|
|
"rewards/accuracy_reward_step": 0.69140625,
|
|
"rewards/final_brier_reward_step": 0.703040599822998,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8489201068878174,
|
|
"step": 61
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.577107310295105,
|
|
"adv/mean_abs_reasoning": 0.5740156173706055,
|
|
"adv/mean_abs_step_conf": 0.7687522172927856,
|
|
"adv/ratio_final_to_reasoning": 1.005386078062931,
|
|
"adv/ratio_step_to_reasoning": 1.3392531388156486,
|
|
"adv/std_final_conf": 0.7763068675994873,
|
|
"adv/std_reasoning": 0.775528073310852,
|
|
"adv/std_step_conf": 0.9355931878089905,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 9.0859375,
|
|
"calib/ece": 0.5302008032128516,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0,
|
|
"calib/mean_conf": 0.9800000000000002,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9800000000000003,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.5302008032128516,
|
|
"calib/std_conf": 2.220446049250313e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4828800856531049,
|
|
"calib/step_q_c_n": 934.0,
|
|
"calib/step_q_gap": 0.04942462588298996,
|
|
"calib/step_q_w": 0.43345545977011496,
|
|
"calib/step_q_w_n": 1392.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2715.0,
|
|
"completions/max_terminated_length": 2715.0,
|
|
"completions/mean_length": 633.60546875,
|
|
"completions/mean_terminated_length": 636.0902099609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 190.0,
|
|
"epoch": 0.06613333333333334,
|
|
"grad_norm": 0.040969934314489365,
|
|
"learning_rate": 3.833333333333334e-06,
|
|
"loss": 0.0081,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.026683034375309944,
|
|
"mask/share_reasoning": 0.8198766708374023,
|
|
"mask/share_step_conf": 0.14953404664993286,
|
|
"num_tokens": 16086492.0,
|
|
"reward": 0.9675499200820923,
|
|
"reward_std": 0.25620460510253906,
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
|
"rewards/final_brier_reward_step": 0.4585171937942505,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.795846700668335,
|
|
"step": 62
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5213485956192017,
|
|
"adv/mean_abs_reasoning": 0.4952261447906494,
|
|
"adv/mean_abs_step_conf": 0.7424039840698242,
|
|
"adv/ratio_final_to_reasoning": 1.0527485293402576,
|
|
"adv/ratio_step_to_reasoning": 1.49912114269263,
|
|
"adv/std_final_conf": 0.7758664488792419,
|
|
"adv/std_reasoning": 0.757529079914093,
|
|
"adv/std_step_conf": 0.934902548789978,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5040650406504066,
|
|
"calib/avg_num_step_conf": 8.828125,
|
|
"calib/ece": 0.466126482213439,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 8.130081300794512e-05,
|
|
"calib/mean_conf": 0.9799604743083007,
|
|
"calib/mu_c": 0.9800000000000002,
|
|
"calib/mu_w": 0.9799186991869923,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.466126482213439,
|
|
"calib/std_conf": 0.0006274509038097849,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4964018957345971,
|
|
"calib/step_q_c_n": 1055.0,
|
|
"calib/step_q_gap": 0.061339655070696664,
|
|
"calib/step_q_w": 0.4350622406639004,
|
|
"calib/step_q_w_n": 1205.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2584.0,
|
|
"completions/max_terminated_length": 2584.0,
|
|
"completions/mean_length": 671.94140625,
|
|
"completions/mean_terminated_length": 671.94140625,
|
|
"completions/min_length": 164.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.0672,
|
|
"grad_norm": 0.040248576551675797,
|
|
"learning_rate": 3.8055555555555556e-06,
|
|
"loss": 0.027,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.02550509385764599,
|
|
"mask/share_reasoning": 0.8300318717956543,
|
|
"mask/share_step_conf": 0.14446300268173218,
|
|
"num_tokens": 16367149.0,
|
|
"reward": 1.043703556060791,
|
|
"reward_std": 0.22153249382972717,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5226527452468872,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8442112803459167,
|
|
"step": 63
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4774155020713806,
|
|
"adv/mean_abs_reasoning": 0.45380038022994995,
|
|
"adv/mean_abs_step_conf": 0.7652921080589294,
|
|
"adv/ratio_final_to_reasoning": 1.0520385677717246,
|
|
"adv/ratio_step_to_reasoning": 1.6864069344127484,
|
|
"adv/std_final_conf": 0.7199251651763916,
|
|
"adv/std_reasoning": 0.7014497518539429,
|
|
"adv/std_step_conf": 0.9351070523262024,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5055555555555555,
|
|
"calib/avg_num_step_conf": 8.43359375,
|
|
"calib/ece": 0.3414056224899601,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.00011111111111117289,
|
|
"calib/mean_conf": 0.9799598393574299,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.9798888888888891,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3414056224899601,
|
|
"calib/std_conf": 0.0006324504316475356,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5067135761589403,
|
|
"calib/step_q_c_n": 1208.0,
|
|
"calib/step_q_gap": 0.04668203041761537,
|
|
"calib/step_q_w": 0.46003154574132493,
|
|
"calib/step_q_w_n": 951.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2698.0,
|
|
"completions/max_terminated_length": 2698.0,
|
|
"completions/mean_length": 583.59765625,
|
|
"completions/mean_terminated_length": 588.1929321289062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 223.0,
|
|
"epoch": 0.06826666666666667,
|
|
"grad_norm": 0.043365515768527985,
|
|
"learning_rate": 3.777777777777778e-06,
|
|
"loss": 0.0162,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.028047874569892883,
|
|
"mask/share_reasoning": 0.8139121532440186,
|
|
"mask/share_step_conf": 0.15022748708724976,
|
|
"num_tokens": 16620326.0,
|
|
"reward": 1.088362455368042,
|
|
"reward_std": 0.2054726779460907,
|
|
"rewards/accuracy_reward_step": 0.625,
|
|
"rewards/final_brier_reward_step": 0.6348433494567871,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8149001598358154,
|
|
"step": 64
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4592784643173218,
|
|
"adv/mean_abs_reasoning": 0.4338899254798889,
|
|
"adv/mean_abs_step_conf": 0.7477743029594421,
|
|
"adv/ratio_final_to_reasoning": 1.058513778141663,
|
|
"adv/ratio_step_to_reasoning": 1.7234193721653994,
|
|
"adv/std_final_conf": 0.7208153605461121,
|
|
"adv/std_reasoning": 0.7014332413673401,
|
|
"adv/std_step_conf": 0.9351285099983215,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5093457943925234,
|
|
"calib/avg_num_step_conf": 7.265625,
|
|
"calib/ece": 0.4028458498023718,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0001869158878503585,
|
|
"calib/mean_conf": 0.9799209486166011,
|
|
"calib/mu_c": 0.9800000000000003,
|
|
"calib/mu_w": 0.97981308411215,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4028458498023718,
|
|
"calib/std_conf": 0.0008855872135339171,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5538731060606061,
|
|
"calib/step_q_c_n": 1056.0,
|
|
"calib/step_q_gap": 0.02426862844866584,
|
|
"calib/step_q_w": 0.5296044776119403,
|
|
"calib/step_q_w_n": 804.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2839.0,
|
|
"completions/max_terminated_length": 2839.0,
|
|
"completions/mean_length": 493.0625,
|
|
"completions/mean_terminated_length": 494.99609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.0,
|
|
"epoch": 0.06933333333333333,
|
|
"grad_norm": 0.04905857890844345,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": -0.0156,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03320653736591339,
|
|
"mask/share_reasoning": 0.8077315092086792,
|
|
"mask/share_step_conf": 0.15515567362308502,
|
|
"num_tokens": 16851574.0,
|
|
"reward": 1.0689868927001953,
|
|
"reward_std": 0.21185335516929626,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/final_brier_reward_step": 0.5867882370948792,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8263109922409058,
|
|
"step": 65
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5579763650894165,
|
|
"adv/mean_abs_reasoning": 0.45325833559036255,
|
|
"adv/mean_abs_step_conf": 0.7479343414306641,
|
|
"adv/ratio_final_to_reasoning": 1.2310338746725091,
|
|
"adv/ratio_step_to_reasoning": 1.6501281558484526,
|
|
"adv/std_final_conf": 0.78619784116745,
|
|
"adv/std_reasoning": 0.7206733822822571,
|
|
"adv/std_step_conf": 0.9343457818031311,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5081289978678039,
|
|
"calib/avg_num_step_conf": 8.96484375,
|
|
"calib/ece": 0.43483739837398416,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.00016257995735602382,
|
|
"calib/mean_conf": 0.9795528455284557,
|
|
"calib/mu_c": 0.979626865671642,
|
|
"calib/mu_w": 0.979464285714286,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.43483739837398416,
|
|
"calib/std_conf": 0.002066784360794499,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4463528301886792,
|
|
"calib/step_q_c_n": 1060.0,
|
|
"calib/step_q_gap": 0.10271720265831485,
|
|
"calib/step_q_w": 0.34363562753036436,
|
|
"calib/step_q_w_n": 1235.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2799.0,
|
|
"completions/max_terminated_length": 2799.0,
|
|
"completions/mean_length": 650.06640625,
|
|
"completions/mean_terminated_length": 655.18505859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 135.0,
|
|
"epoch": 0.0704,
|
|
"grad_norm": 0.052809618413448334,
|
|
"learning_rate": 3.7222222222222225e-06,
|
|
"loss": 0.0081,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.028058627620339394,
|
|
"mask/share_reasoning": 0.814427375793457,
|
|
"mask/share_step_conf": 0.1497015357017517,
|
|
"num_tokens": 17124343.0,
|
|
"reward": 1.0408997535705566,
|
|
"reward_std": 0.2173253446817398,
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
|
"rewards/final_brier_reward_step": 0.5410004258155823,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/step_l2_reward": 0.8292826414108276,
|
|
"step": 66
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.514562726020813,
|
|
"adv/mean_abs_reasoning": 0.36621803045272827,
|
|
"adv/mean_abs_step_conf": 0.7596355676651001,
|
|
"adv/ratio_final_to_reasoning": 1.4050720697304202,
|
|
"adv/ratio_step_to_reasoning": 2.0742713479345047,
|
|
"adv/std_final_conf": 0.7502995729446411,
|
|
"adv/std_reasoning": 0.6611066460609436,
|
|
"adv/std_step_conf": 0.9344931244850159,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6056374787942059,
|
|
"calib/avg_num_step_conf": 8.1640625,
|
|
"calib/ece": 0.3581960784313728,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.002112749575884254,
|
|
"calib/mean_conf": 0.9778039215686277,
|
|
"calib/mu_c": 0.9786075949367091,
|
|
"calib/mu_w": 0.9764948453608249,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3581960784313728,
|
|
"calib/std_conf": 0.004139809637771501,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4553899480069324,
|
|
"calib/step_q_c_n": 1154.0,
|
|
"calib/step_q_gap": 0.08926815313513758,
|
|
"calib/step_q_w": 0.3661217948717948,
|
|
"calib/step_q_w_n": 936.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2240.0,
|
|
"completions/max_terminated_length": 2240.0,
|
|
"completions/mean_length": 578.56640625,
|
|
"completions/mean_terminated_length": 578.56640625,
|
|
"completions/min_length": 195.0,
|
|
"completions/min_terminated_length": 195.0,
|
|
"epoch": 0.07146666666666666,
|
|
"grad_norm": 0.06071910262107849,
|
|
"learning_rate": 3.694444444444445e-06,
|
|
"loss": 0.0373,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.030203592032194138,
|
|
"mask/share_reasoning": 0.8200080990791321,
|
|
"mask/share_step_conf": 0.1497882902622223,
|
|
"num_tokens": 17377464.0,
|
|
"reward": 1.1311116218566895,
|
|
"reward_std": 0.15913987159729004,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/final_brier_reward_step": 0.6344921588897705,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8700499534606934,
|
|
"step": 67
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6473630666732788,
|
|
"adv/mean_abs_reasoning": 0.4455353021621704,
|
|
"adv/mean_abs_step_conf": 0.7649122476577759,
|
|
"adv/ratio_final_to_reasoning": 1.4530006119192886,
|
|
"adv/ratio_step_to_reasoning": 1.7168386970587473,
|
|
"adv/std_final_conf": 0.8262444734573364,
|
|
"adv/std_reasoning": 0.7205067873001099,
|
|
"adv/std_step_conf": 0.9339404702186584,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.501116213802781,
|
|
"calib/avg_num_step_conf": 8.4453125,
|
|
"calib/ece": 0.4406772908366535,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 2.2324276055307735e-05,
|
|
"calib/mean_conf": 0.9745418326693228,
|
|
"calib/mu_c": 0.97455223880597,
|
|
"calib/mu_w": 0.9745299145299147,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4406772908366535,
|
|
"calib/std_conf": 0.004978964018458066,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.44734989648033124,
|
|
"calib/step_q_c_n": 966.0,
|
|
"calib/step_q_gap": 0.09595357541009714,
|
|
"calib/step_q_w": 0.3513963210702341,
|
|
"calib/step_q_w_n": 1196.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2641.0,
|
|
"completions/max_terminated_length": 2641.0,
|
|
"completions/mean_length": 587.65625,
|
|
"completions/mean_terminated_length": 589.9608154296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 152.0,
|
|
"epoch": 0.07253333333333334,
|
|
"grad_norm": 0.06573013961315155,
|
|
"learning_rate": 3.6666666666666666e-06,
|
|
"loss": -0.046,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.0318446159362793,
|
|
"mask/share_reasoning": 0.8090634346008301,
|
|
"mask/share_step_conf": 0.15518571436405182,
|
|
"num_tokens": 17631992.0,
|
|
"reward": 1.0506805181503296,
|
|
"reward_std": 0.1893540322780609,
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
|
"rewards/final_brier_reward_step": 0.5460590124130249,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8363473415374756,
|
|
"step": 68
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5252597332000732,
|
|
"adv/mean_abs_reasoning": 0.4546195864677429,
|
|
"adv/mean_abs_step_conf": 0.7643197774887085,
|
|
"adv/ratio_final_to_reasoning": 1.1553829813651078,
|
|
"adv/ratio_step_to_reasoning": 1.6812293184005613,
|
|
"adv/std_final_conf": 0.7402248978614807,
|
|
"adv/std_reasoning": 0.7014936804771423,
|
|
"adv/std_step_conf": 0.9350502490997314,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5089355089355089,
|
|
"calib/avg_num_step_conf": 8.1796875,
|
|
"calib/ece": 0.4404016064257028,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.00017871017870996209,
|
|
"calib/mean_conf": 0.9705220883534137,
|
|
"calib/mu_c": 0.9706060606060605,
|
|
"calib/mu_w": 0.9704273504273505,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4404016064257028,
|
|
"calib/std_conf": 0.002224479104277308,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.40132663316582917,
|
|
"calib/step_q_c_n": 995.0,
|
|
"calib/step_q_gap": 0.025148289216784636,
|
|
"calib/step_q_w": 0.37617834394904454,
|
|
"calib/step_q_w_n": 1099.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2817.0,
|
|
"completions/max_terminated_length": 2817.0,
|
|
"completions/mean_length": 623.11328125,
|
|
"completions/mean_terminated_length": 633.0040283203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.0736,
|
|
"grad_norm": 0.05324266478419304,
|
|
"learning_rate": 3.638888888888889e-06,
|
|
"loss": -0.0488,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.02733626589179039,
|
|
"mask/share_reasoning": 0.8253809213638306,
|
|
"mask/share_step_conf": 0.13165783882141113,
|
|
"num_tokens": 17896005.0,
|
|
"reward": 1.031106948852539,
|
|
"reward_std": 0.19873470067977905,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/final_brier_reward_step": 0.5418062806129456,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8151673674583435,
|
|
"step": 69
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5496417284011841,
|
|
"adv/mean_abs_reasoning": 0.5222122073173523,
|
|
"adv/mean_abs_step_conf": 0.7780110836029053,
|
|
"adv/ratio_final_to_reasoning": 1.0525256221503123,
|
|
"adv/ratio_step_to_reasoning": 1.4898370292789844,
|
|
"adv/std_final_conf": 0.7696312665939331,
|
|
"adv/std_reasoning": 0.7575502991676331,
|
|
"adv/std_step_conf": 0.9350816011428833,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.516640461215933,
|
|
"calib/avg_num_step_conf": 8.25,
|
|
"calib/ece": 0.5462799999999999,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.00033280922431877524,
|
|
"calib/mean_conf": 0.9702799999999999,
|
|
"calib/mu_c": 0.9704716981132075,
|
|
"calib/mu_w": 0.9701388888888888,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.5462799999999999,
|
|
"calib/std_conf": 0.0016497272501841038,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4809333333333333,
|
|
"calib/step_q_c_n": 750.0,
|
|
"calib/step_q_gap": 0.08823876651982376,
|
|
"calib/step_q_w": 0.39269456681350956,
|
|
"calib/step_q_w_n": 1362.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2657.0,
|
|
"completions/max_terminated_length": 2657.0,
|
|
"completions/mean_length": 594.40625,
|
|
"completions/mean_terminated_length": 599.0866088867188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 144.0,
|
|
"epoch": 0.07466666666666667,
|
|
"grad_norm": 0.04241788014769554,
|
|
"learning_rate": 3.6111111111111115e-06,
|
|
"loss": -0.0155,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.029813095927238464,
|
|
"mask/share_reasoning": 0.815823495388031,
|
|
"mask/share_step_conf": 0.14655089378356934,
|
|
"num_tokens": 18155165.0,
|
|
"reward": 0.989437460899353,
|
|
"reward_std": 0.21000359952449799,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/final_brier_reward_step": 0.44679102301597595,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8359725475311279,
|
|
"step": 70
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6323713064193726,
|
|
"adv/mean_abs_reasoning": 0.6069374084472656,
|
|
"adv/mean_abs_step_conf": 0.7431070804595947,
|
|
"adv/ratio_final_to_reasoning": 1.041905306244304,
|
|
"adv/ratio_step_to_reasoning": 1.2243553785236165,
|
|
"adv/std_final_conf": 0.818419337272644,
|
|
"adv/std_reasoning": 0.8099581599235535,
|
|
"adv/std_step_conf": 0.93465656042099,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.49574877428163594,
|
|
"calib/avg_num_step_conf": 8.83984375,
|
|
"calib/ece": 0.45429133858267723,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.00023893750387904422,
|
|
"calib/mean_conf": 0.9700393700787402,
|
|
"calib/mu_c": 0.9699236641221373,
|
|
"calib/mu_w": 0.9701626016260163,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.45429133858267723,
|
|
"calib/std_conf": 0.0022619814838146833,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.43796892341842397,
|
|
"calib/step_q_c_n": 901.0,
|
|
"calib/step_q_gap": 0.054371272904473966,
|
|
"calib/step_q_w": 0.38359765051395,
|
|
"calib/step_q_w_n": 1362.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2765.0,
|
|
"completions/max_terminated_length": 2765.0,
|
|
"completions/mean_length": 604.88671875,
|
|
"completions/mean_terminated_length": 604.88671875,
|
|
"completions/min_length": 174.0,
|
|
"completions/min_terminated_length": 174.0,
|
|
"epoch": 0.07573333333333333,
|
|
"grad_norm": 0.04573419690132141,
|
|
"learning_rate": 3.5833333333333335e-06,
|
|
"loss": 0.0376,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.030430704355239868,
|
|
"mask/share_reasoning": 0.8164864182472229,
|
|
"mask/share_step_conf": 0.15308287739753723,
|
|
"num_tokens": 18414424.0,
|
|
"reward": 1.0544276237487793,
|
|
"reward_std": 0.24058274924755096,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5392640829086304,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8463940620422363,
|
|
"step": 71
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.48391905426979065,
|
|
"adv/mean_abs_reasoning": 0.44466525316238403,
|
|
"adv/mean_abs_step_conf": 0.7678430676460266,
|
|
"adv/ratio_final_to_reasoning": 1.0882771946497736,
|
|
"adv/ratio_step_to_reasoning": 1.726788999556985,
|
|
"adv/std_final_conf": 0.7654800415039062,
|
|
"adv/std_reasoning": 0.7391319274902344,
|
|
"adv/std_step_conf": 0.9339613318443298,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5026279128672746,
|
|
"calib/avg_num_step_conf": 7.640625,
|
|
"calib/ece": 0.412806324110672,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 5.255825734518105e-05,
|
|
"calib/mean_conf": 0.9701185770750989,
|
|
"calib/mu_c": 0.970141843971631,
|
|
"calib/mu_w": 0.9700892857142858,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.412806324110672,
|
|
"calib/std_conf": 0.001082455647243412,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.45534722222222224,
|
|
"calib/step_q_c_n": 1008.0,
|
|
"calib/step_q_gap": 0.05440840365682137,
|
|
"calib/step_q_w": 0.40093881856540087,
|
|
"calib/step_q_w_n": 948.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2657.0,
|
|
"completions/max_terminated_length": 2657.0,
|
|
"completions/mean_length": 541.44140625,
|
|
"completions/mean_terminated_length": 543.5647583007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 188.0,
|
|
"epoch": 0.0768,
|
|
"grad_norm": 0.04849497973918915,
|
|
"learning_rate": 3.555555555555556e-06,
|
|
"loss": -0.0047,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.030670518055558205,
|
|
"mask/share_reasoning": 0.8153384923934937,
|
|
"mask/share_step_conf": 0.1500847041606903,
|
|
"num_tokens": 18657441.0,
|
|
"reward": 1.0819460153579712,
|
|
"reward_std": 0.18929322063922882,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/final_brier_reward_step": 0.5721668004989624,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.855941653251648,
|
|
"step": 72
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5332915186882019,
|
|
"adv/mean_abs_reasoning": 0.5199190378189087,
|
|
"adv/mean_abs_step_conf": 0.7636487483978271,
|
|
"adv/ratio_final_to_reasoning": 1.0257203139269366,
|
|
"adv/ratio_step_to_reasoning": 1.4687839699068899,
|
|
"adv/std_final_conf": 0.7628645896911621,
|
|
"adv/std_reasoning": 0.7575727105140686,
|
|
"adv/std_step_conf": 0.9346169233322144,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.497632702053881,
|
|
"calib/avg_num_step_conf": 7.37890625,
|
|
"calib/ece": 0.33086274509803926,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -4.734595892252891e-05,
|
|
"calib/mean_conf": 0.970078431372549,
|
|
"calib/mu_c": 0.9700613496932514,
|
|
"calib/mu_w": 0.970108695652174,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.33086274509803926,
|
|
"calib/std_conf": 0.0008821350493491766,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4796269335759782,
|
|
"calib/step_q_c_n": 1099.0,
|
|
"calib/step_q_gap": 0.06263959180382628,
|
|
"calib/step_q_w": 0.4169873417721519,
|
|
"calib/step_q_w_n": 790.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1942.0,
|
|
"completions/max_terminated_length": 1942.0,
|
|
"completions/mean_length": 516.22265625,
|
|
"completions/mean_terminated_length": 518.2470703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.0,
|
|
"epoch": 0.07786666666666667,
|
|
"grad_norm": 0.039155565202236176,
|
|
"learning_rate": 3.5277777777777784e-06,
|
|
"loss": -0.0826,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.031482987105846405,
|
|
"mask/share_reasoning": 0.8183671832084656,
|
|
"mask/share_step_conf": 0.14624357223510742,
|
|
"num_tokens": 18896626.0,
|
|
"reward": 1.1384083032608032,
|
|
"reward_std": 0.20831382274627686,
|
|
"rewards/accuracy_reward_step": 0.63671875,
|
|
"rewards/final_brier_reward_step": 0.6573105454444885,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8619623184204102,
|
|
"step": 73
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.41902828216552734,
|
|
"adv/mean_abs_reasoning": 0.40599530935287476,
|
|
"adv/mean_abs_step_conf": 0.7844444513320923,
|
|
"adv/ratio_final_to_reasoning": 1.0321012891341679,
|
|
"adv/ratio_step_to_reasoning": 1.9321515132340723,
|
|
"adv/std_final_conf": 0.6868499517440796,
|
|
"adv/std_reasoning": 0.681566059589386,
|
|
"adv/std_step_conf": 0.9346157312393188,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.508130081300813,
|
|
"calib/avg_num_step_conf": 7.19921875,
|
|
"calib/ece": 0.4858267716535433,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.00016260162601633432,
|
|
"calib/mean_conf": 0.9700787401574803,
|
|
"calib/mu_c": 0.9701626016260163,
|
|
"calib/mu_w": 0.97,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4858267716535433,
|
|
"calib/std_conf": 0.000883856075615892,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.47340071343638523,
|
|
"calib/step_q_c_n": 841.0,
|
|
"calib/step_q_gap": 0.06918913659007786,
|
|
"calib/step_q_w": 0.4042115768463074,
|
|
"calib/step_q_w_n": 1002.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2841.0,
|
|
"completions/max_terminated_length": 2841.0,
|
|
"completions/mean_length": 558.4921875,
|
|
"completions/mean_terminated_length": 558.4921875,
|
|
"completions/min_length": 165.0,
|
|
"completions/min_terminated_length": 165.0,
|
|
"epoch": 0.07893333333333333,
|
|
"grad_norm": 0.046783946454524994,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.06,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.0309610553085804,
|
|
"mask/share_reasoning": 0.8248661160469055,
|
|
"mask/share_step_conf": 0.14417284727096558,
|
|
"num_tokens": 19143528.0,
|
|
"reward": 1.0438826084136963,
|
|
"reward_std": 0.1730068027973175,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/final_brier_reward_step": 0.5102828145027161,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8553006052970886,
|
|
"step": 74
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4345305562019348,
|
|
"adv/mean_abs_reasoning": 0.41730642318725586,
|
|
"adv/mean_abs_step_conf": 0.769504725933075,
|
|
"adv/ratio_final_to_reasoning": 1.041274545651913,
|
|
"adv/ratio_step_to_reasoning": 1.8439800663882397,
|
|
"adv/std_final_conf": 0.6885610222816467,
|
|
"adv/std_reasoning": 0.6816553473472595,
|
|
"adv/std_step_conf": 0.9336322546005249,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5056497175141242,
|
|
"calib/avg_num_step_conf": 7.20703125,
|
|
"calib/ece": 0.2732283464566928,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.00011299435028266913,
|
|
"calib/mean_conf": 0.9700787401574802,
|
|
"calib/mu_c": 0.9701129943502824,
|
|
"calib/mu_w": 0.9699999999999998,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2732283464566928,
|
|
"calib/std_conf": 0.0008838560756158926,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.45865293185419964,
|
|
"calib/step_q_c_n": 1262.0,
|
|
"calib/step_q_gap": 0.02942480149399379,
|
|
"calib/step_q_w": 0.42922813036020585,
|
|
"calib/step_q_w_n": 583.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1781.0,
|
|
"completions/max_terminated_length": 1781.0,
|
|
"completions/mean_length": 496.20703125,
|
|
"completions/mean_terminated_length": 498.1529846191406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.03853510692715645,
|
|
"learning_rate": 3.4722222222222224e-06,
|
|
"loss": -0.0564,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03274504095315933,
|
|
"mask/share_reasoning": 0.8104146718978882,
|
|
"mask/share_step_conf": 0.1529339998960495,
|
|
"num_tokens": 19375309.0,
|
|
"reward": 1.164655089378357,
|
|
"reward_std": 0.18265056610107422,
|
|
"rewards/accuracy_reward_step": 0.69140625,
|
|
"rewards/final_brier_reward_step": 0.708564043045044,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8560182452201843,
|
|
"step": 75
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.43551361560821533,
|
|
"adv/mean_abs_reasoning": 0.43244290351867676,
|
|
"adv/mean_abs_step_conf": 0.7513256669044495,
|
|
"adv/ratio_final_to_reasoning": 1.0071008497643341,
|
|
"adv/ratio_step_to_reasoning": 1.7373985346761518,
|
|
"adv/std_final_conf": 0.7214844226837158,
|
|
"adv/std_reasoning": 0.7204523086547852,
|
|
"adv/std_step_conf": 0.9339141249656677,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 6.44140625,
|
|
"calib/ece": 0.3582352941176471,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0,
|
|
"calib/mean_conf": 0.9700000000000001,
|
|
"calib/mu_c": 0.97,
|
|
"calib/mu_w": 0.97,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3582352941176471,
|
|
"calib/std_conf": 1.1102230246251565e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.45638655462184874,
|
|
"calib/step_q_c_n": 952.0,
|
|
"calib/step_q_gap": 0.05774953883992617,
|
|
"calib/step_q_w": 0.39863701578192257,
|
|
"calib/step_q_w_n": 697.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2476.0,
|
|
"completions/max_terminated_length": 2476.0,
|
|
"completions/mean_length": 541.484375,
|
|
"completions/mean_terminated_length": 541.484375,
|
|
"completions/min_length": 187.0,
|
|
"completions/min_terminated_length": 187.0,
|
|
"epoch": 0.08106666666666666,
|
|
"grad_norm": 0.04242115840315819,
|
|
"learning_rate": 3.444444444444445e-06,
|
|
"loss": -0.0574,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.032614558935165405,
|
|
"mask/share_reasoning": 0.8351109027862549,
|
|
"mask/share_step_conf": 0.1322745382785797,
|
|
"num_tokens": 19616985.0,
|
|
"reward": 1.1245042085647583,
|
|
"reward_std": 0.17931437492370605,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/final_brier_reward_step": 0.6314507722854614,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8648300170898438,
|
|
"step": 76
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.45722830295562744,
|
|
"adv/mean_abs_reasoning": 0.40404924750328064,
|
|
"adv/mean_abs_step_conf": 0.7532762885093689,
|
|
"adv/ratio_final_to_reasoning": 1.1316152815057898,
|
|
"adv/ratio_step_to_reasoning": 1.864318008668616,
|
|
"adv/std_final_conf": 0.7099508047103882,
|
|
"adv/std_reasoning": 0.6816768646240234,
|
|
"adv/std_step_conf": 0.9351339340209961,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 7.328125,
|
|
"calib/ece": 0.3280708661417322,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.00030674846625766694,
|
|
"calib/mean_conf": 0.9698031496062991,
|
|
"calib/mu_c": 0.9696932515337423,
|
|
"calib/mu_w": 0.97,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.3280708661417322,
|
|
"calib/std_conf": 0.0038115848483745015,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.40374316939890703,
|
|
"calib/step_q_c_n": 1098.0,
|
|
"calib/step_q_gap": 0.007803580709961011,
|
|
"calib/step_q_w": 0.395939588688946,
|
|
"calib/step_q_w_n": 778.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2061.0,
|
|
"completions/max_terminated_length": 2061.0,
|
|
"completions/mean_length": 508.76953125,
|
|
"completions/mean_terminated_length": 512.7755737304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.08213333333333334,
|
|
"grad_norm": 0.0632067620754242,
|
|
"learning_rate": 3.416666666666667e-06,
|
|
"loss": -0.1064,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03560970723628998,
|
|
"mask/share_reasoning": 0.8034310340881348,
|
|
"mask/share_step_conf": 0.15314674377441406,
|
|
"num_tokens": 19851894.0,
|
|
"reward": 1.1153215169906616,
|
|
"reward_std": 0.18604689836502075,
|
|
"rewards/accuracy_reward_step": 0.63671875,
|
|
"rewards/final_brier_reward_step": 0.6566660404205322,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.833172082901001,
|
|
"step": 77
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5653814077377319,
|
|
"adv/mean_abs_reasoning": 0.5597813129425049,
|
|
"adv/mean_abs_step_conf": 0.7569602727890015,
|
|
"adv/ratio_final_to_reasoning": 1.0100040759949453,
|
|
"adv/ratio_step_to_reasoning": 1.3522428407086695,
|
|
"adv/std_final_conf": 0.7767623662948608,
|
|
"adv/std_reasoning": 0.7754603028297424,
|
|
"adv/std_step_conf": 0.9340821504592896,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5,
|
|
"calib/avg_num_step_conf": 7.59765625,
|
|
"calib/ece": 0.37784313725490204,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -3.3306690738754696e-16,
|
|
"calib/mean_conf": 0.9700000000000001,
|
|
"calib/mu_c": 0.9699999999999998,
|
|
"calib/mu_w": 0.9700000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.37784313725490204,
|
|
"calib/std_conf": 1.1102230246251565e-16,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3812932454695222,
|
|
"calib/step_q_c_n": 1214.0,
|
|
"calib/step_q_gap": 0.020171494443530447,
|
|
"calib/step_q_w": 0.36112175102599176,
|
|
"calib/step_q_w_n": 731.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1415.0,
|
|
"completions/max_terminated_length": 1415.0,
|
|
"completions/mean_length": 575.14453125,
|
|
"completions/mean_terminated_length": 577.4000244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.0832,
|
|
"grad_norm": 0.04561072587966919,
|
|
"learning_rate": 3.3888888888888893e-06,
|
|
"loss": -0.0392,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.02947339601814747,
|
|
"mask/share_reasoning": 0.8271172046661377,
|
|
"mask/share_step_conf": 0.1395030915737152,
|
|
"num_tokens": 20107155.0,
|
|
"reward": 1.1115992069244385,
|
|
"reward_std": 0.2046259045600891,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.6133222579956055,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8617924451828003,
|
|
"step": 78
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4079931676387787,
|
|
"adv/mean_abs_reasoning": 0.3414595127105713,
|
|
"adv/mean_abs_step_conf": 0.7496354579925537,
|
|
"adv/ratio_final_to_reasoning": 1.1948507874331877,
|
|
"adv/ratio_step_to_reasoning": 2.1953860709335737,
|
|
"adv/std_final_conf": 0.7016916871070862,
|
|
"adv/std_reasoning": 0.6401836276054382,
|
|
"adv/std_step_conf": 0.9334725737571716,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.4964635854341737,
|
|
"calib/avg_num_step_conf": 8.0546875,
|
|
"calib/ece": 0.30387351778656135,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.9683794466403162,
|
|
"calib/gap": -0.002706582633052901,
|
|
"calib/mean_conf": 0.9658498023715416,
|
|
"calib/mu_c": 0.9649404761904763,
|
|
"calib/mu_w": 0.9676470588235292,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3028458498023716,
|
|
"calib/std_conf": 0.024409410847163165,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.37789432176656146,
|
|
"calib/step_q_c_n": 1268.0,
|
|
"calib/step_q_gap": 0.04780616055749354,
|
|
"calib/step_q_w": 0.3300881612090679,
|
|
"calib/step_q_w_n": 794.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2633.0,
|
|
"completions/max_terminated_length": 2633.0,
|
|
"completions/mean_length": 575.953125,
|
|
"completions/mean_terminated_length": 575.953125,
|
|
"completions/min_length": 158.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.08426666666666667,
|
|
"grad_norm": 0.05421285331249237,
|
|
"learning_rate": 3.3611111111111117e-06,
|
|
"loss": 0.0006,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.029839934781193733,
|
|
"mask/share_reasoning": 0.8252792358398438,
|
|
"mask/share_step_conf": 0.14488080143928528,
|
|
"num_tokens": 20360975.0,
|
|
"reward": 1.152230978012085,
|
|
"reward_std": 0.14872446656227112,
|
|
"rewards/accuracy_reward_step": 0.65625,
|
|
"rewards/final_brier_reward_step": 0.6759929656982422,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8663750290870667,
|
|
"step": 79
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4370696544647217,
|
|
"adv/mean_abs_reasoning": 0.4167598485946655,
|
|
"adv/mean_abs_step_conf": 0.7575744390487671,
|
|
"adv/ratio_final_to_reasoning": 1.0487326356858555,
|
|
"adv/ratio_step_to_reasoning": 1.8177721332881394,
|
|
"adv/std_final_conf": 0.6915924549102783,
|
|
"adv/std_reasoning": 0.681614875793457,
|
|
"adv/std_step_conf": 0.9341986775398254,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5116374589266155,
|
|
"calib/avg_num_step_conf": 8.19140625,
|
|
"calib/ece": 0.31618110236220476,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.9960629921259843,
|
|
"calib/gap": 0.001143209200437978,
|
|
"calib/mean_conf": 0.9697244094488189,
|
|
"calib/mu_c": 0.9701204819277108,
|
|
"calib/mu_w": 0.9689772727272729,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.31618110236220476,
|
|
"calib/std_conf": 0.005709747385167787,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4008960843373494,
|
|
"calib/step_q_c_n": 1328.0,
|
|
"calib/step_q_gap": 0.05092209213969012,
|
|
"calib/step_q_w": 0.3499739921976593,
|
|
"calib/step_q_w_n": 769.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3014.0,
|
|
"completions/max_terminated_length": 3014.0,
|
|
"completions/mean_length": 541.7734375,
|
|
"completions/mean_terminated_length": 541.7734375,
|
|
"completions/min_length": 190.0,
|
|
"completions/min_terminated_length": 190.0,
|
|
"epoch": 0.08533333333333333,
|
|
"grad_norm": 0.04697663336992264,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.0711,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03264261782169342,
|
|
"mask/share_reasoning": 0.8089589476585388,
|
|
"mask/share_step_conf": 0.15839841961860657,
|
|
"num_tokens": 20601829.0,
|
|
"reward": 1.139902114868164,
|
|
"reward_std": 0.16698545217514038,
|
|
"rewards/accuracy_reward_step": 0.6484375,
|
|
"rewards/final_brier_reward_step": 0.6688238382339478,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8552368879318237,
|
|
"step": 80
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.40665262937545776,
|
|
"adv/mean_abs_reasoning": 0.38142406940460205,
|
|
"adv/mean_abs_step_conf": 0.7557171583175659,
|
|
"adv/ratio_final_to_reasoning": 1.0661430727490195,
|
|
"adv/ratio_step_to_reasoning": 1.9813043248613817,
|
|
"adv/std_final_conf": 0.6708281636238098,
|
|
"adv/std_reasoning": 0.6612600684165955,
|
|
"adv/std_step_conf": 0.9344485998153687,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5261813537675607,
|
|
"calib/avg_num_step_conf": 7.49609375,
|
|
"calib/ece": 0.3181526104417671,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.9879518072289156,
|
|
"calib/gap": 0.004093231162196842,
|
|
"calib/mean_conf": 0.9687550200803213,
|
|
"calib/mu_c": 0.9701851851851852,
|
|
"calib/mu_w": 0.9660919540229883,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3181526104417671,
|
|
"calib/std_conf": 0.01259726116483335,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4059841740850643,
|
|
"calib/step_q_c_n": 1011.0,
|
|
"calib/step_q_gap": 0.0693762445696458,
|
|
"calib/step_q_w": 0.3366079295154185,
|
|
"calib/step_q_w_n": 908.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2366.0,
|
|
"completions/max_terminated_length": 2366.0,
|
|
"completions/mean_length": 546.99609375,
|
|
"completions/mean_terminated_length": 553.4822387695312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.0,
|
|
"epoch": 0.0864,
|
|
"grad_norm": 0.04766637831926346,
|
|
"learning_rate": 3.3055555555555558e-06,
|
|
"loss": -0.1404,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.032697394490242004,
|
|
"mask/share_reasoning": 0.8120584487915039,
|
|
"mask/share_step_conf": 0.1435253918170929,
|
|
"num_tokens": 20848108.0,
|
|
"reward": 1.109546184539795,
|
|
"reward_std": 0.167487233877182,
|
|
"rewards/accuracy_reward_step": 0.63671875,
|
|
"rewards/final_brier_reward_step": 0.6547554731369019,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8283078670501709,
|
|
"step": 81
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.42695850133895874,
|
|
"adv/mean_abs_reasoning": 0.3536386787891388,
|
|
"adv/mean_abs_step_conf": 0.7595793008804321,
|
|
"adv/ratio_final_to_reasoning": 1.2073297604234567,
|
|
"adv/ratio_step_to_reasoning": 2.147896557812162,
|
|
"adv/std_final_conf": 0.676507294178009,
|
|
"adv/std_reasoning": 0.6185661554336548,
|
|
"adv/std_step_conf": 0.93404620885849,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5224534847051403,
|
|
"calib/avg_num_step_conf": 7.01171875,
|
|
"calib/ece": 0.3791796875000001,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.98828125,
|
|
"calib/gap": 0.0015736360769472713,
|
|
"calib/mean_conf": 0.9690234375000002,
|
|
"calib/mu_c": 0.9696688741721854,
|
|
"calib/mu_w": 0.9680952380952381,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3791796875000001,
|
|
"calib/std_conf": 0.010871382188277335,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.43265495867768594,
|
|
"calib/step_q_c_n": 968.0,
|
|
"calib/step_q_gap": 0.07552073860513453,
|
|
"calib/step_q_w": 0.3571342200725514,
|
|
"calib/step_q_w_n": 827.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1516.0,
|
|
"completions/max_terminated_length": 1516.0,
|
|
"completions/mean_length": 488.78125,
|
|
"completions/mean_terminated_length": 490.69805908203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 204.0,
|
|
"epoch": 0.08746666666666666,
|
|
"grad_norm": 0.05314906686544418,
|
|
"learning_rate": 3.277777777777778e-06,
|
|
"loss": -0.0194,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03320850431919098,
|
|
"mask/share_reasoning": 0.8129414319992065,
|
|
"mask/share_step_conf": 0.14994382858276367,
|
|
"num_tokens": 21078788.0,
|
|
"reward": 1.1136729717254639,
|
|
"reward_std": 0.1398569941520691,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.6149379014968872,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/step_l2_reward": 0.8629594445228577,
|
|
"step": 82
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.41274362802505493,
|
|
"adv/mean_abs_reasoning": 0.33305490016937256,
|
|
"adv/mean_abs_step_conf": 0.7500869631767273,
|
|
"adv/ratio_final_to_reasoning": 1.239266042370664,
|
|
"adv/ratio_step_to_reasoning": 2.252142102684201,
|
|
"adv/std_final_conf": 0.6662124395370483,
|
|
"adv/std_reasoning": 0.6185774207115173,
|
|
"adv/std_step_conf": 0.9347962737083435,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5276705276705277,
|
|
"calib/avg_num_step_conf": 7.77734375,
|
|
"calib/ece": 0.41266932270916334,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0006241956241953561,
|
|
"calib/mean_conf": 0.9704382470119521,
|
|
"calib/mu_c": 0.9707142857142855,
|
|
"calib/mu_w": 0.9700900900900902,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.41266932270916334,
|
|
"calib/std_conf": 0.002233208997169965,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4368304712041885,
|
|
"calib/step_q_c_n": 955.0,
|
|
"calib/step_q_gap": 0.0845042163779337,
|
|
"calib/step_q_w": 0.3523262548262548,
|
|
"calib/step_q_w_n": 1036.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2395.0,
|
|
"completions/max_terminated_length": 2395.0,
|
|
"completions/mean_length": 609.265625,
|
|
"completions/mean_terminated_length": 614.06298828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 163.0,
|
|
"epoch": 0.08853333333333334,
|
|
"grad_norm": 0.040096431970596313,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": -0.0661,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.029325375333428383,
|
|
"mask/share_reasoning": 0.8259421586990356,
|
|
"mask/share_step_conf": 0.13691997528076172,
|
|
"num_tokens": 21342024.0,
|
|
"reward": 1.0817862749099731,
|
|
"reward_std": 0.1610364317893982,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/final_brier_reward_step": 0.5719507932662964,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8574353456497192,
|
|
"step": 83
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5629602670669556,
|
|
"adv/mean_abs_reasoning": 0.48572349548339844,
|
|
"adv/mean_abs_step_conf": 0.755538821220398,
|
|
"adv/ratio_final_to_reasoning": 1.159013867564076,
|
|
"adv/ratio_step_to_reasoning": 1.5554916083861163,
|
|
"adv/std_final_conf": 0.7861579656600952,
|
|
"adv/std_reasoning": 0.7574853897094727,
|
|
"adv/std_step_conf": 0.9348664283752441,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5920216147488875,
|
|
"calib/avg_num_step_conf": 6.91015625,
|
|
"calib/ece": 0.40600790513833995,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.9960474308300395,
|
|
"calib/gap": 0.0028111888111884697,
|
|
"calib/mean_conf": 0.9712252964426878,
|
|
"calib/mu_c": 0.9724475524475522,
|
|
"calib/mu_w": 0.9696363636363637,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.40600790513833995,
|
|
"calib/std_conf": 0.007627741677189908,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.48983559127439724,
|
|
"calib/step_q_c_n": 871.0,
|
|
"calib/step_q_gap": 0.12687345318976467,
|
|
"calib/step_q_w": 0.36296213808463257,
|
|
"calib/step_q_w_n": 898.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2603.0,
|
|
"completions/max_terminated_length": 2603.0,
|
|
"completions/mean_length": 503.91015625,
|
|
"completions/mean_terminated_length": 507.8779602050781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 126.0,
|
|
"epoch": 0.0896,
|
|
"grad_norm": 0.05198689177632332,
|
|
"learning_rate": 3.2222222222222227e-06,
|
|
"loss": -0.0975,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.035145025700330734,
|
|
"mask/share_reasoning": 0.8113081455230713,
|
|
"mask/share_step_conf": 0.14573431015014648,
|
|
"num_tokens": 21576945.0,
|
|
"reward": 1.0832618474960327,
|
|
"reward_std": 0.20970940589904785,
|
|
"rewards/accuracy_reward_step": 0.55859375,
|
|
"rewards/final_brier_reward_step": 0.5838117599487305,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8488913178443909,
|
|
"step": 84
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6328294277191162,
|
|
"adv/mean_abs_reasoning": 0.4318961501121521,
|
|
"adv/mean_abs_step_conf": 0.7624772191047668,
|
|
"adv/ratio_final_to_reasoning": 1.465235167191898,
|
|
"adv/ratio_step_to_reasoning": 1.7654179573186088,
|
|
"adv/std_final_conf": 0.8357207775115967,
|
|
"adv/std_reasoning": 0.72041255235672,
|
|
"adv/std_step_conf": 0.9348968863487244,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5754936120789779,
|
|
"calib/avg_num_step_conf": 7.08203125,
|
|
"calib/ece": 0.4671887550200804,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0016898954703832292,
|
|
"calib/mean_conf": 0.9732128514056225,
|
|
"calib/mu_c": 0.9740476190476192,
|
|
"calib/mu_w": 0.9723577235772359,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4671887550200804,
|
|
"calib/std_conf": 0.005607553309464117,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5065581339712919,
|
|
"calib/step_q_c_n": 836.0,
|
|
"calib/step_q_gap": 0.12335444922205957,
|
|
"calib/step_q_w": 0.38320368474923233,
|
|
"calib/step_q_w_n": 977.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2993.0,
|
|
"completions/max_terminated_length": 2993.0,
|
|
"completions/mean_length": 585.8671875,
|
|
"completions/mean_terminated_length": 590.4802856445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 182.0,
|
|
"epoch": 0.09066666666666667,
|
|
"grad_norm": 0.05411646142601967,
|
|
"learning_rate": 3.1944444444444443e-06,
|
|
"loss": -0.0524,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.032674334943294525,
|
|
"mask/share_reasoning": 0.8182451725006104,
|
|
"mask/share_step_conf": 0.14126797020435333,
|
|
"num_tokens": 21834751.0,
|
|
"reward": 1.0298364162445068,
|
|
"reward_std": 0.1878812611103058,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/final_brier_reward_step": 0.5180214643478394,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.832455039024353,
|
|
"step": 85
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.544785737991333,
|
|
"adv/mean_abs_reasoning": 0.3841592073440552,
|
|
"adv/mean_abs_step_conf": 0.763214111328125,
|
|
"adv/ratio_final_to_reasoning": 1.4181249012819308,
|
|
"adv/ratio_step_to_reasoning": 1.9867130521346221,
|
|
"adv/std_final_conf": 0.7668167948722839,
|
|
"adv/std_reasoning": 0.6611892580986023,
|
|
"adv/std_step_conf": 0.9353486895561218,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6578898050974513,
|
|
"calib/avg_num_step_conf": 7.19140625,
|
|
"calib/ece": 0.4315748031496064,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0031146926536730346,
|
|
"calib/mean_conf": 0.9748818897637797,
|
|
"calib/mu_c": 0.9763043478260871,
|
|
"calib/mu_w": 0.9731896551724141,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4315748031496064,
|
|
"calib/std_conf": 0.006000805947482726,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5288233155080214,
|
|
"calib/step_q_c_n": 935.0,
|
|
"calib/step_q_gap": 0.05683247665592428,
|
|
"calib/step_q_w": 0.47199083885209714,
|
|
"calib/step_q_w_n": 906.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2596.0,
|
|
"completions/max_terminated_length": 2596.0,
|
|
"completions/mean_length": 535.390625,
|
|
"completions/mean_terminated_length": 537.490234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 128.0,
|
|
"epoch": 0.09173333333333333,
|
|
"grad_norm": 0.0701175406575203,
|
|
"learning_rate": 3.1666666666666667e-06,
|
|
"loss": -0.0438,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03447920083999634,
|
|
"mask/share_reasoning": 0.8152116537094116,
|
|
"mask/share_step_conf": 0.14640289545059204,
|
|
"num_tokens": 22077323.0,
|
|
"reward": 1.0588345527648926,
|
|
"reward_std": 0.18509967625141144,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.5626976490020752,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8324810266494751,
|
|
"step": 86
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5743167400360107,
|
|
"adv/mean_abs_reasoning": 0.5077738165855408,
|
|
"adv/mean_abs_step_conf": 0.7713330388069153,
|
|
"adv/ratio_final_to_reasoning": 1.131048355147434,
|
|
"adv/ratio_step_to_reasoning": 1.5190484692449175,
|
|
"adv/std_final_conf": 0.7639585733413696,
|
|
"adv/std_reasoning": 0.7394582033157349,
|
|
"adv/std_step_conf": 0.9350627660751343,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5658251319355299,
|
|
"calib/avg_num_step_conf": 6.44140625,
|
|
"calib/ece": 0.30130434782608717,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0014413065183285223,
|
|
"calib/mean_conf": 0.9771936758893283,
|
|
"calib/mu_c": 0.9776608187134505,
|
|
"calib/mu_w": 0.9762195121951219,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.30130434782608717,
|
|
"calib/std_conf": 0.004913283552019082,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5630849898580123,
|
|
"calib/step_q_c_n": 986.0,
|
|
"calib/step_q_gap": 0.040424356373849335,
|
|
"calib/step_q_w": 0.5226606334841629,
|
|
"calib/step_q_w_n": 663.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2570.0,
|
|
"completions/max_terminated_length": 2570.0,
|
|
"completions/mean_length": 477.34765625,
|
|
"completions/mean_terminated_length": 481.1062927246094,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.0928,
|
|
"grad_norm": 0.05513214319944382,
|
|
"learning_rate": 3.138888888888889e-06,
|
|
"loss": -0.0012,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03634363412857056,
|
|
"mask/share_reasoning": 0.812298595905304,
|
|
"mask/share_step_conf": 0.1435452699661255,
|
|
"num_tokens": 22305020.0,
|
|
"reward": 1.1288774013519287,
|
|
"reward_std": 0.23732128739356995,
|
|
"rewards/accuracy_reward_step": 0.66796875,
|
|
"rewards/final_brier_reward_step": 0.682665228843689,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8292262554168701,
|
|
"step": 87
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.526688277721405,
|
|
"adv/mean_abs_reasoning": 0.4350961446762085,
|
|
"adv/mean_abs_step_conf": 0.7446381449699402,
|
|
"adv/ratio_final_to_reasoning": 1.2105101002753262,
|
|
"adv/ratio_step_to_reasoning": 1.711433562630365,
|
|
"adv/std_final_conf": 0.782920777797699,
|
|
"adv/std_reasoning": 0.7204387784004211,
|
|
"adv/std_step_conf": 0.9349430203437805,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6315269994905757,
|
|
"calib/avg_num_step_conf": 7.00390625,
|
|
"calib/ece": 0.38584313725490216,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0026305399898116644,
|
|
"calib/mean_conf": 0.9780000000000002,
|
|
"calib/mu_c": 0.9790728476821194,
|
|
"calib/mu_w": 0.9764423076923078,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.38584313725490216,
|
|
"calib/std_conf": 0.0040000000000000036,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5448845401174169,
|
|
"calib/step_q_c_n": 1022.0,
|
|
"calib/step_q_gap": 0.11432682286709261,
|
|
"calib/step_q_w": 0.4305577172503243,
|
|
"calib/step_q_w_n": 771.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2205.0,
|
|
"completions/max_terminated_length": 2205.0,
|
|
"completions/mean_length": 539.45703125,
|
|
"completions/mean_terminated_length": 539.45703125,
|
|
"completions/min_length": 189.0,
|
|
"completions/min_terminated_length": 189.0,
|
|
"epoch": 0.09386666666666667,
|
|
"grad_norm": 0.05252831056714058,
|
|
"learning_rate": 3.1111111111111116e-06,
|
|
"loss": 0.0136,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03185161575675011,
|
|
"mask/share_reasoning": 0.8286886215209961,
|
|
"mask/share_step_conf": 0.1394597440958023,
|
|
"num_tokens": 22552969.0,
|
|
"reward": 1.1006900072097778,
|
|
"reward_std": 0.18641909956932068,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.6084863543510437,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.850470781326294,
|
|
"step": 88
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5511601567268372,
|
|
"adv/mean_abs_reasoning": 0.3774305284023285,
|
|
"adv/mean_abs_step_conf": 0.755623459815979,
|
|
"adv/ratio_final_to_reasoning": 1.460295644498896,
|
|
"adv/ratio_step_to_reasoning": 2.0020199823648324,
|
|
"adv/std_final_conf": 0.7930145859718323,
|
|
"adv/std_reasoning": 0.6612715125083923,
|
|
"adv/std_step_conf": 0.9353261590003967,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6280397022332507,
|
|
"calib/avg_num_step_conf": 7.1015625,
|
|
"calib/ece": 0.4659055118110238,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0024714640198508775,
|
|
"calib/mean_conf": 0.977716535433071,
|
|
"calib/mu_c": 0.978923076923077,
|
|
"calib/mu_w": 0.9764516129032261,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4659055118110238,
|
|
"calib/std_conf": 0.004470194622751896,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.48424789410348973,
|
|
"calib/step_q_c_n": 831.0,
|
|
"calib/step_q_gap": 0.10486228113489804,
|
|
"calib/step_q_w": 0.3793856129685917,
|
|
"calib/step_q_w_n": 987.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2048.0,
|
|
"completions/max_terminated_length": 2048.0,
|
|
"completions/mean_length": 566.0546875,
|
|
"completions/mean_terminated_length": 568.2745361328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.09493333333333333,
|
|
"grad_norm": 0.06655947118997574,
|
|
"learning_rate": 3.0833333333333336e-06,
|
|
"loss": -0.0578,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03302885964512825,
|
|
"mask/share_reasoning": 0.8250543475151062,
|
|
"mask/share_step_conf": 0.13801056146621704,
|
|
"num_tokens": 22806767.0,
|
|
"reward": 1.0520408153533936,
|
|
"reward_std": 0.1870345175266266,
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
|
"rewards/final_brier_reward_step": 0.5262097120285034,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8524354100227356,
|
|
"step": 89
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5208219885826111,
|
|
"adv/mean_abs_reasoning": 0.4154004752635956,
|
|
"adv/mean_abs_step_conf": 0.7598018646240234,
|
|
"adv/ratio_final_to_reasoning": 1.25378284233334,
|
|
"adv/ratio_step_to_reasoning": 1.829082800499651,
|
|
"adv/std_final_conf": 0.7493654489517212,
|
|
"adv/std_reasoning": 0.6815720200538635,
|
|
"adv/std_step_conf": 0.9325631260871887,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5939135919268368,
|
|
"calib/avg_num_step_conf": 6.89453125,
|
|
"calib/ece": 0.38828125000000036,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0016650898770105282,
|
|
"calib/mean_conf": 0.9781250000000004,
|
|
"calib/mu_c": 0.9788079470198678,
|
|
"calib/mu_w": 0.9771428571428573,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.38828125000000036,
|
|
"calib/std_conf": 0.004463392767839285,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4680568136272545,
|
|
"calib/step_q_c_n": 998.0,
|
|
"calib/step_q_gap": 0.0973006206676717,
|
|
"calib/step_q_w": 0.3707561929595828,
|
|
"calib/step_q_w_n": 767.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2116.0,
|
|
"completions/max_terminated_length": 2116.0,
|
|
"completions/mean_length": 504.16015625,
|
|
"completions/mean_terminated_length": 506.1372985839844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 132.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.05374359339475632,
|
|
"learning_rate": 3.055555555555556e-06,
|
|
"loss": -0.0964,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.035145342350006104,
|
|
"mask/share_reasoning": 0.8199383616447449,
|
|
"mask/share_step_conf": 0.14101001620292664,
|
|
"num_tokens": 23039152.0,
|
|
"reward": 1.1121408939361572,
|
|
"reward_std": 0.16609475016593933,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.6080952882766724,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/step_l2_reward": 0.8654782772064209,
|
|
"step": 90
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5854384899139404,
|
|
"adv/mean_abs_reasoning": 0.46899843215942383,
|
|
"adv/mean_abs_step_conf": 0.7628809213638306,
|
|
"adv/ratio_final_to_reasoning": 1.2482738742182742,
|
|
"adv/ratio_step_to_reasoning": 1.626617210320458,
|
|
"adv/std_final_conf": 0.8055523633956909,
|
|
"adv/std_reasoning": 0.7393237352371216,
|
|
"adv/std_step_conf": 0.9347228407859802,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5655892255892256,
|
|
"calib/avg_num_step_conf": 6.921875,
|
|
"calib/ece": 0.37485943775100405,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0013353535353536339,
|
|
"calib/mean_conf": 0.977269076305221,
|
|
"calib/mu_c": 0.9778000000000001,
|
|
"calib/mu_w": 0.9764646464646465,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.37485943775100405,
|
|
"calib/std_conf": 0.004544722826805921,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4203867403314917,
|
|
"calib/step_q_c_n": 905.0,
|
|
"calib/step_q_gap": 0.09297036201545938,
|
|
"calib/step_q_w": 0.3274163783160323,
|
|
"calib/step_q_w_n": 867.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02734375,
|
|
"completions/max_length": 2226.0,
|
|
"completions/max_terminated_length": 2226.0,
|
|
"completions/mean_length": 503.62890625,
|
|
"completions/mean_terminated_length": 517.787109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 206.0,
|
|
"epoch": 0.09706666666666666,
|
|
"grad_norm": 0.08242634683847427,
|
|
"learning_rate": 3.0277777777777776e-06,
|
|
"loss": -0.1546,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.030973821878433228,
|
|
"mask/share_reasoning": 0.8174804449081421,
|
|
"mask/share_step_conf": 0.1242019534111023,
|
|
"num_tokens": 23275793.0,
|
|
"reward": 1.0824449062347412,
|
|
"reward_std": 0.20780614018440247,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/final_brier_reward_step": 0.6036179065704346,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8330353498458862,
|
|
"step": 91
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5711477994918823,
|
|
"adv/mean_abs_reasoning": 0.43993639945983887,
|
|
"adv/mean_abs_step_conf": 0.7688874006271362,
|
|
"adv/ratio_final_to_reasoning": 1.2982508385147193,
|
|
"adv/ratio_step_to_reasoning": 1.7477239927662016,
|
|
"adv/std_final_conf": 0.8007752299308777,
|
|
"adv/std_reasoning": 0.7204440236091614,
|
|
"adv/std_step_conf": 0.9330356121063232,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.562315775983596,
|
|
"calib/avg_num_step_conf": 6.10546875,
|
|
"calib/ece": 0.37564705882352956,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.001241830065359606,
|
|
"calib/mean_conf": 0.9756470588235295,
|
|
"calib/mu_c": 0.9761437908496734,
|
|
"calib/mu_w": 0.9749019607843138,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.37564705882352956,
|
|
"calib/std_conf": 0.005113701431436252,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.40907103825136615,
|
|
"calib/step_q_c_n": 915.0,
|
|
"calib/step_q_gap": 0.07323770491803283,
|
|
"calib/step_q_w": 0.3358333333333333,
|
|
"calib/step_q_w_n": 648.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2637.0,
|
|
"completions/max_terminated_length": 2637.0,
|
|
"completions/mean_length": 489.6953125,
|
|
"completions/mean_terminated_length": 491.61572265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.09813333333333334,
|
|
"grad_norm": 0.07487954944372177,
|
|
"learning_rate": 3e-06,
|
|
"loss": -0.022,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03821246325969696,
|
|
"mask/share_reasoning": 0.8173834085464478,
|
|
"mask/share_step_conf": 0.14049787819385529,
|
|
"num_tokens": 23507875.0,
|
|
"reward": 1.1162192821502686,
|
|
"reward_std": 0.16407504677772522,
|
|
"rewards/accuracy_reward_step": 0.59765625,
|
|
"rewards/final_brier_reward_step": 0.6170394420623779,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8644327521324158,
|
|
"step": 92
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.538732647895813,
|
|
"adv/mean_abs_reasoning": 0.3245697617530823,
|
|
"adv/mean_abs_step_conf": 0.7636405825614929,
|
|
"adv/ratio_final_to_reasoning": 1.6598362243789548,
|
|
"adv/ratio_step_to_reasoning": 2.3527779619299087,
|
|
"adv/std_final_conf": 0.746753990650177,
|
|
"adv/std_reasoning": 0.5961504578590393,
|
|
"adv/std_step_conf": 0.934518575668335,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5975177304964538,
|
|
"calib/avg_num_step_conf": 7.4296875,
|
|
"calib/ece": 0.4168379446640317,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.001999746707193273,
|
|
"calib/mean_conf": 0.9741501976284586,
|
|
"calib/mu_c": 0.9750354609929077,
|
|
"calib/mu_w": 0.9730357142857144,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4168379446640317,
|
|
"calib/std_conf": 0.005006830311016599,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4416926503340758,
|
|
"calib/step_q_c_n": 898.0,
|
|
"calib/step_q_gap": 0.036832092565151464,
|
|
"calib/step_q_w": 0.4048605577689243,
|
|
"calib/step_q_w_n": 1004.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2443.0,
|
|
"completions/max_terminated_length": 2443.0,
|
|
"completions/mean_length": 569.02734375,
|
|
"completions/mean_terminated_length": 571.2588500976562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.0,
|
|
"epoch": 0.0992,
|
|
"grad_norm": 0.07459692656993866,
|
|
"learning_rate": 2.9722222222222225e-06,
|
|
"loss": -0.0646,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.032702356576919556,
|
|
"mask/share_reasoning": 0.818350076675415,
|
|
"mask/share_step_conf": 0.1450413316488266,
|
|
"num_tokens": 23759322.0,
|
|
"reward": 1.0743638277053833,
|
|
"reward_std": 0.15945008397102356,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5736898183822632,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8448168635368347,
|
|
"step": 93
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5882172584533691,
|
|
"adv/mean_abs_reasoning": 0.454916775226593,
|
|
"adv/mean_abs_step_conf": 0.7690072059631348,
|
|
"adv/ratio_final_to_reasoning": 1.2930216920674764,
|
|
"adv/ratio_step_to_reasoning": 1.6904349275317314,
|
|
"adv/std_final_conf": 0.785946249961853,
|
|
"adv/std_reasoning": 0.7205522060394287,
|
|
"adv/std_step_conf": 0.9350289702415466,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6242,
|
|
"calib/avg_num_step_conf": 6.41796875,
|
|
"calib/ece": 0.37396000000000007,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0024333333333332874,
|
|
"calib/mean_conf": 0.97396,
|
|
"calib/mu_c": 0.9749333333333334,
|
|
"calib/mu_w": 0.9725000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.37396000000000007,
|
|
"calib/std_conf": 0.004971760251661381,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5005479452054794,
|
|
"calib/step_q_c_n": 949.0,
|
|
"calib/step_q_gap": 0.13027416998934105,
|
|
"calib/step_q_w": 0.37027377521613836,
|
|
"calib/step_q_w_n": 694.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2999.0,
|
|
"completions/max_terminated_length": 2999.0,
|
|
"completions/mean_length": 518.140625,
|
|
"completions/mean_terminated_length": 522.220458984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 169.0,
|
|
"epoch": 0.10026666666666667,
|
|
"grad_norm": 0.050825074315071106,
|
|
"learning_rate": 2.944444444444445e-06,
|
|
"loss": -0.0603,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03436311334371567,
|
|
"mask/share_reasoning": 0.8233369588851929,
|
|
"mask/share_step_conf": 0.13448745012283325,
|
|
"num_tokens": 24000646.0,
|
|
"reward": 1.0840754508972168,
|
|
"reward_std": 0.19846372306346893,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/final_brier_reward_step": 0.6067355275154114,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8326102495193481,
|
|
"step": 94
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4853938817977905,
|
|
"adv/mean_abs_reasoning": 0.3698340356349945,
|
|
"adv/mean_abs_step_conf": 0.7322413325309753,
|
|
"adv/ratio_final_to_reasoning": 1.3124640650349637,
|
|
"adv/ratio_step_to_reasoning": 1.9799187256352375,
|
|
"adv/std_final_conf": 0.7317978143692017,
|
|
"adv/std_reasoning": 0.6612231731414795,
|
|
"adv/std_step_conf": 0.9335426688194275,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7348688873139616,
|
|
"calib/avg_num_step_conf": 6.828125,
|
|
"calib/ece": 0.3020158102766799,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.004806520198441078,
|
|
"calib/mean_conf": 0.9739525691699605,
|
|
"calib/mu_c": 0.9755294117647061,
|
|
"calib/mu_w": 0.970722891566265,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.3020158102766799,
|
|
"calib/std_conf": 0.00504815969678723,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.5019259962049336,
|
|
"calib/step_q_c_n": 1054.0,
|
|
"calib/step_q_gap": 0.11604703366891045,
|
|
"calib/step_q_w": 0.38587896253602316,
|
|
"calib/step_q_w_n": 694.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2767.0,
|
|
"completions/max_terminated_length": 2767.0,
|
|
"completions/mean_length": 528.54296875,
|
|
"completions/mean_terminated_length": 530.61572265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 125.0,
|
|
"epoch": 0.10133333333333333,
|
|
"grad_norm": 0.05134730041027069,
|
|
"learning_rate": 2.916666666666667e-06,
|
|
"loss": -0.026,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.0339992381632328,
|
|
"mask/share_reasoning": 0.8192802667617798,
|
|
"mask/share_step_conf": 0.1428142488002777,
|
|
"num_tokens": 24242081.0,
|
|
"reward": 1.1571464538574219,
|
|
"reward_std": 0.18443702161312103,
|
|
"rewards/accuracy_reward_step": 0.6640625,
|
|
"rewards/final_brier_reward_step": 0.6784464716911316,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8707724809646606,
|
|
"step": 95
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.43800491094589233,
|
|
"adv/mean_abs_reasoning": 0.29693323373794556,
|
|
"adv/mean_abs_step_conf": 0.7541441917419434,
|
|
"adv/ratio_final_to_reasoning": 1.4750956147012082,
|
|
"adv/ratio_step_to_reasoning": 2.539776980327851,
|
|
"adv/std_final_conf": 0.6472437381744385,
|
|
"adv/std_reasoning": 0.5726427435874939,
|
|
"adv/std_step_conf": 0.9342457056045532,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6856541759288663,
|
|
"calib/avg_num_step_conf": 6.33203125,
|
|
"calib/ece": 0.23807843137254914,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0037924738012073966,
|
|
"calib/mean_conf": 0.9753333333333335,
|
|
"calib/mu_c": 0.976329787234043,
|
|
"calib/mu_w": 0.9725373134328356,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.23807843137254914,
|
|
"calib/std_conf": 0.005143687037512034,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5463881636205395,
|
|
"calib/step_q_c_n": 1149.0,
|
|
"calib/step_q_gap": 0.11132460429850566,
|
|
"calib/step_q_w": 0.4350635593220339,
|
|
"calib/step_q_w_n": 472.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1075.0,
|
|
"completions/max_terminated_length": 1075.0,
|
|
"completions/mean_length": 457.33203125,
|
|
"completions/mean_terminated_length": 459.1255187988281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 157.0,
|
|
"epoch": 0.1024,
|
|
"grad_norm": 0.07884582877159119,
|
|
"learning_rate": 2.888888888888889e-06,
|
|
"loss": -0.0015,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03597208485007286,
|
|
"mask/share_reasoning": 0.8180271983146667,
|
|
"mask/share_step_conf": 0.1420944631099701,
|
|
"num_tokens": 24464974.0,
|
|
"reward": 1.182147741317749,
|
|
"reward_std": 0.1482551097869873,
|
|
"rewards/accuracy_reward_step": 0.734375,
|
|
"rewards/final_brier_reward_step": 0.7442148327827454,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8498453497886658,
|
|
"step": 96
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6110037565231323,
|
|
"adv/mean_abs_reasoning": 0.45363372564315796,
|
|
"adv/mean_abs_step_conf": 0.7293815016746521,
|
|
"adv/ratio_final_to_reasoning": 1.3469099010591783,
|
|
"adv/ratio_step_to_reasoning": 1.6078643637894015,
|
|
"adv/std_final_conf": 0.8205944299697876,
|
|
"adv/std_reasoning": 0.7391649484634399,
|
|
"adv/std_step_conf": 0.9352025389671326,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6430432516935904,
|
|
"calib/avg_num_step_conf": 6.61328125,
|
|
"calib/ece": 0.37557312252964437,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0028465346534654046,
|
|
"calib/mean_conf": 0.9763636363636364,
|
|
"calib/mu_c": 0.9775,
|
|
"calib/mu_w": 0.9746534653465346,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.37557312252964437,
|
|
"calib/std_conf": 0.005050944205764396,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.6193447311827956,
|
|
"calib/step_q_c_n": 930.0,
|
|
"calib/step_q_gap": 0.1463041020871207,
|
|
"calib/step_q_w": 0.47304062909567496,
|
|
"calib/step_q_w_n": 763.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2841.0,
|
|
"completions/max_terminated_length": 2841.0,
|
|
"completions/mean_length": 487.59375,
|
|
"completions/mean_terminated_length": 489.50592041015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.0,
|
|
"epoch": 0.10346666666666667,
|
|
"grad_norm": 0.05761410668492317,
|
|
"learning_rate": 2.861111111111111e-06,
|
|
"loss": -0.0134,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03603067994117737,
|
|
"mask/share_reasoning": 0.8179956674575806,
|
|
"mask/share_step_conf": 0.14206740260124207,
|
|
"num_tokens": 24694870.0,
|
|
"reward": 1.0798836946487427,
|
|
"reward_std": 0.21278470754623413,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/final_brier_reward_step": 0.6129417419433594,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8208003044128418,
|
|
"step": 97
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6085901260375977,
|
|
"adv/mean_abs_reasoning": 0.5149928331375122,
|
|
"adv/mean_abs_step_conf": 0.7647644281387329,
|
|
"adv/ratio_final_to_reasoning": 1.181744845515342,
|
|
"adv/ratio_step_to_reasoning": 1.4850001377291542,
|
|
"adv/std_final_conf": 0.7990161180496216,
|
|
"adv/std_reasoning": 0.7576404213905334,
|
|
"adv/std_step_conf": 0.9346080422401428,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5509554140127388,
|
|
"calib/avg_num_step_conf": 6.2734375,
|
|
"calib/ece": 0.34451612903225826,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.001038706516413468,
|
|
"calib/mean_conf": 0.9775806451612905,
|
|
"calib/mu_c": 0.9779617834394906,
|
|
"calib/mu_w": 0.9769230769230771,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.34451612903225826,
|
|
"calib/std_conf": 0.004375696763306628,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.6171648351648352,
|
|
"calib/step_q_c_n": 910.0,
|
|
"calib/step_q_gap": 0.03322805355563985,
|
|
"calib/step_q_w": 0.5839367816091954,
|
|
"calib/step_q_w_n": 696.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2996.0,
|
|
"completions/max_terminated_length": 2996.0,
|
|
"completions/mean_length": 495.140625,
|
|
"completions/mean_terminated_length": 503.0000305175781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 94.0,
|
|
"epoch": 0.10453333333333334,
|
|
"grad_norm": 0.07149934023618698,
|
|
"learning_rate": 2.8333333333333335e-06,
|
|
"loss": -0.1034,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.034688372164964676,
|
|
"mask/share_reasoning": 0.8171087503433228,
|
|
"mask/share_step_conf": 0.1325778365135193,
|
|
"num_tokens": 24927810.0,
|
|
"reward": 1.077172875404358,
|
|
"reward_std": 0.26091310381889343,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/final_brier_reward_step": 0.6252793073654175,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.808440089225769,
|
|
"step": 98
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5918498039245605,
|
|
"adv/mean_abs_reasoning": 0.4674447774887085,
|
|
"adv/mean_abs_step_conf": 0.7354602813720703,
|
|
"adv/ratio_final_to_reasoning": 1.266138445495537,
|
|
"adv/ratio_step_to_reasoning": 1.5733629228317476,
|
|
"adv/std_final_conf": 0.8150935769081116,
|
|
"adv/std_reasoning": 0.7392680048942566,
|
|
"adv/std_step_conf": 0.935352087020874,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.684703196347032,
|
|
"calib/avg_num_step_conf": 6.765625,
|
|
"calib/ece": 0.5582071713147414,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.003666666666666818,
|
|
"calib/mean_conf": 0.976533864541833,
|
|
"calib/mu_c": 0.978666666666667,
|
|
"calib/mu_w": 0.9750000000000002,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.5582071713147414,
|
|
"calib/std_conf": 0.0050037683613772975,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.6395565092989985,
|
|
"calib/step_q_c_n": 699.0,
|
|
"calib/step_q_gap": 0.08880142701439053,
|
|
"calib/step_q_w": 0.550755082284608,
|
|
"calib/step_q_w_n": 1033.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2665.0,
|
|
"completions/max_terminated_length": 2665.0,
|
|
"completions/mean_length": 562.4921875,
|
|
"completions/mean_terminated_length": 571.420654296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 154.0,
|
|
"epoch": 0.1056,
|
|
"grad_norm": 0.05055514723062515,
|
|
"learning_rate": 2.805555555555556e-06,
|
|
"loss": -0.1247,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.030417390167713165,
|
|
"mask/share_reasoning": 0.8215442895889282,
|
|
"mask/share_step_conf": 0.1324133276939392,
|
|
"num_tokens": 25177608.0,
|
|
"reward": 0.9419336915016174,
|
|
"reward_std": 0.21998000144958496,
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
|
"rewards/final_brier_reward_step": 0.438107430934906,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.7784231901168823,
|
|
"step": 99
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5585629940032959,
|
|
"adv/mean_abs_reasoning": 0.4550975561141968,
|
|
"adv/mean_abs_step_conf": 0.767812967300415,
|
|
"adv/ratio_final_to_reasoning": 1.2273478213606068,
|
|
"adv/ratio_step_to_reasoning": 1.6871392891148578,
|
|
"adv/std_final_conf": 0.798711895942688,
|
|
"adv/std_reasoning": 0.7205420136451721,
|
|
"adv/std_step_conf": 0.9352794885635376,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6672497570456755,
|
|
"calib/avg_num_step_conf": 6.56640625,
|
|
"calib/ece": 0.39373015873015876,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.996031746031746,
|
|
"calib/gap": 0.004272108843537681,
|
|
"calib/mean_conf": 0.9770634920634922,
|
|
"calib/mu_c": 0.9788435374149663,
|
|
"calib/mu_w": 0.9745714285714286,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.39373015873015876,
|
|
"calib/std_conf": 0.008268074027423371,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.6221988372093025,
|
|
"calib/step_q_c_n": 946.0,
|
|
"calib/step_q_gap": 0.054375707957601827,
|
|
"calib/step_q_w": 0.5678231292517006,
|
|
"calib/step_q_w_n": 735.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2503.0,
|
|
"completions/max_terminated_length": 2503.0,
|
|
"completions/mean_length": 547.8984375,
|
|
"completions/mean_terminated_length": 552.2125854492188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.10666666666666667,
|
|
"grad_norm": 0.05975564941763878,
|
|
"learning_rate": 2.7777777777777783e-06,
|
|
"loss": -0.0241,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03263147920370102,
|
|
"mask/share_reasoning": 0.8286091685295105,
|
|
"mask/share_step_conf": 0.1309468150138855,
|
|
"num_tokens": 25425278.0,
|
|
"reward": 1.0638865232467651,
|
|
"reward_std": 0.22568199038505554,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/final_brier_reward_step": 0.5942621231079102,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8145281076431274,
|
|
"step": 100
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5452511310577393,
|
|
"adv/mean_abs_reasoning": 0.4778488278388977,
|
|
"adv/mean_abs_step_conf": 0.7576862573623657,
|
|
"adv/ratio_final_to_reasoning": 1.1410536121301644,
|
|
"adv/ratio_step_to_reasoning": 1.5856191607480778,
|
|
"adv/std_final_conf": 0.7721120715141296,
|
|
"adv/std_reasoning": 0.7392907738685608,
|
|
"adv/std_step_conf": 0.9355528950691223,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6051843533595358,
|
|
"calib/avg_num_step_conf": 6.76953125,
|
|
"calib/ece": 0.43818897637795295,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0021298895751451496,
|
|
"calib/mean_conf": 0.9775590551181105,
|
|
"calib/mu_c": 0.9785401459854017,
|
|
"calib/mu_w": 0.9764102564102566,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.43818897637795295,
|
|
"calib/std_conf": 0.004386187236915108,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.602814569536424,
|
|
"calib/step_q_c_n": 906.0,
|
|
"calib/step_q_gap": 0.047498850068467546,
|
|
"calib/step_q_w": 0.5553157194679564,
|
|
"calib/step_q_w_n": 827.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2896.0,
|
|
"completions/max_terminated_length": 2896.0,
|
|
"completions/mean_length": 568.68359375,
|
|
"completions/mean_terminated_length": 570.9137573242188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.10773333333333333,
|
|
"grad_norm": 0.05229601263999939,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": -0.0096,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.031221158802509308,
|
|
"mask/share_reasoning": 0.8331286907196045,
|
|
"mask/share_step_conf": 0.13174395263195038,
|
|
"num_tokens": 25677853.0,
|
|
"reward": 1.027117133140564,
|
|
"reward_std": 0.22584015130996704,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/final_brier_reward_step": 0.5561999678611755,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.7950435876846313,
|
|
"step": 101
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.48673343658447266,
|
|
"adv/mean_abs_reasoning": 0.3910636007785797,
|
|
"adv/mean_abs_step_conf": 0.746878981590271,
|
|
"adv/ratio_final_to_reasoning": 1.244640093364407,
|
|
"adv/ratio_step_to_reasoning": 1.9098657612298569,
|
|
"adv/std_final_conf": 0.7263456583023071,
|
|
"adv/std_reasoning": 0.6612106561660767,
|
|
"adv/std_step_conf": 0.9347814917564392,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6418454204971059,
|
|
"calib/avg_num_step_conf": 6.7578125,
|
|
"calib/ece": 0.3285433070866144,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0028593803200548384,
|
|
"calib/mean_conf": 0.9781496062992128,
|
|
"calib/mu_c": 0.9791515151515156,
|
|
"calib/mu_w": 0.9762921348314607,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3285433070866144,
|
|
"calib/std_conf": 0.003983388222953356,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5955947580645161,
|
|
"calib/step_q_c_n": 992.0,
|
|
"calib/step_q_gap": 0.05136711578809339,
|
|
"calib/step_q_w": 0.5442276422764227,
|
|
"calib/step_q_w_n": 738.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2981.0,
|
|
"completions/max_terminated_length": 2981.0,
|
|
"completions/mean_length": 501.609375,
|
|
"completions/mean_terminated_length": 501.609375,
|
|
"completions/min_length": 124.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.1088,
|
|
"grad_norm": 0.07377835363149643,
|
|
"learning_rate": 2.7222222222222224e-06,
|
|
"loss": 0.01,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03854910284280777,
|
|
"mask/share_reasoning": 0.80856853723526,
|
|
"mask/share_step_conf": 0.15288236737251282,
|
|
"num_tokens": 25912961.0,
|
|
"reward": 1.1215847730636597,
|
|
"reward_std": 0.18449583649635315,
|
|
"rewards/accuracy_reward_step": 0.64453125,
|
|
"rewards/final_brier_reward_step": 0.6605261564254761,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8368663191795349,
|
|
"step": 102
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4801088869571686,
|
|
"adv/mean_abs_reasoning": 0.4155534505844116,
|
|
"adv/mean_abs_step_conf": 0.7427923679351807,
|
|
"adv/ratio_final_to_reasoning": 1.155348093685589,
|
|
"adv/ratio_step_to_reasoning": 1.787477319441237,
|
|
"adv/std_final_conf": 0.7518900036811829,
|
|
"adv/std_reasoning": 0.7204484343528748,
|
|
"adv/std_step_conf": 0.9343893527984619,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6622718052738337,
|
|
"calib/avg_num_step_conf": 6.9453125,
|
|
"calib/ece": 0.3909311740890692,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.003231913455037261,
|
|
"calib/mean_conf": 0.9779757085020246,
|
|
"calib/mu_c": 0.9793103448275864,
|
|
"calib/mu_w": 0.9760784313725491,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.3909311740890692,
|
|
"calib/std_conf": 0.00421480629559188,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.6107300115874854,
|
|
"calib/step_q_c_n": 863.0,
|
|
"calib/step_q_gap": 0.06798684218857831,
|
|
"calib/step_q_w": 0.5427431693989071,
|
|
"calib/step_q_w_n": 915.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2995.0,
|
|
"completions/max_terminated_length": 2995.0,
|
|
"completions/mean_length": 617.6953125,
|
|
"completions/mean_terminated_length": 622.55908203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 155.0,
|
|
"epoch": 0.10986666666666667,
|
|
"grad_norm": 0.060474298894405365,
|
|
"learning_rate": 2.6944444444444444e-06,
|
|
"loss": 0.007,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.033057354390621185,
|
|
"mask/share_reasoning": 0.8269200921058655,
|
|
"mask/share_step_conf": 0.13221006095409393,
|
|
"num_tokens": 26175643.0,
|
|
"reward": 1.0655455589294434,
|
|
"reward_std": 0.19261986017227173,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/final_brier_reward_step": 0.5849835872650146,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8265714645385742,
|
|
"step": 103
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6086057424545288,
|
|
"adv/mean_abs_reasoning": 0.5149220824241638,
|
|
"adv/mean_abs_step_conf": 0.7702325582504272,
|
|
"adv/ratio_final_to_reasoning": 1.1819375459473762,
|
|
"adv/ratio_step_to_reasoning": 1.4958235129950264,
|
|
"adv/std_final_conf": 0.8138259053230286,
|
|
"adv/std_reasoning": 0.757559597492218,
|
|
"adv/std_step_conf": 0.9335253834724426,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.67715625,
|
|
"calib/avg_num_step_conf": 7.01171875,
|
|
"calib/ece": 0.4819762845849804,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0035431250000000913,
|
|
"calib/mean_conf": 0.9760474308300396,
|
|
"calib/mu_c": 0.9778400000000004,
|
|
"calib/mu_w": 0.9742968750000003,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4819762845849804,
|
|
"calib/std_conf": 0.004889058054092092,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5650879396984924,
|
|
"calib/step_q_c_n": 796.0,
|
|
"calib/step_q_gap": 0.07133418594473862,
|
|
"calib/step_q_w": 0.49375375375375374,
|
|
"calib/step_q_w_n": 999.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2414.0,
|
|
"completions/max_terminated_length": 2414.0,
|
|
"completions/mean_length": 537.5390625,
|
|
"completions/mean_terminated_length": 541.7716674804688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 156.0,
|
|
"epoch": 0.11093333333333333,
|
|
"grad_norm": 0.07125498354434967,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": -0.0048,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.032777514308691025,
|
|
"mask/share_reasoning": 0.8187971711158752,
|
|
"mask/share_step_conf": 0.14061282575130463,
|
|
"num_tokens": 26419933.0,
|
|
"reward": 1.0168688297271729,
|
|
"reward_std": 0.21766817569732666,
|
|
"rewards/accuracy_reward_step": 0.48828125,
|
|
"rewards/final_brier_reward_step": 0.513393759727478,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8166875839233398,
|
|
"step": 104
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.551266074180603,
|
|
"adv/mean_abs_reasoning": 0.5299026370048523,
|
|
"adv/mean_abs_step_conf": 0.7634249925613403,
|
|
"adv/ratio_final_to_reasoning": 1.0403157781899377,
|
|
"adv/ratio_step_to_reasoning": 1.4406891742913701,
|
|
"adv/std_final_conf": 0.8026061058044434,
|
|
"adv/std_reasoning": 0.7926851511001587,
|
|
"adv/std_step_conf": 0.9345788359642029,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6324555628703095,
|
|
"calib/avg_num_step_conf": 7.8984375,
|
|
"calib/ece": 0.36264822134387353,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0026491112574061892,
|
|
"calib/mean_conf": 0.9752964426877471,
|
|
"calib/mu_c": 0.9763225806451614,
|
|
"calib/mu_w": 0.9736734693877552,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.36264822134387353,
|
|
"calib/std_conf": 0.004991204437095454,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5292671166827387,
|
|
"calib/step_q_c_n": 1037.0,
|
|
"calib/step_q_gap": 0.05610975627664733,
|
|
"calib/step_q_w": 0.4731573604060914,
|
|
"calib/step_q_w_n": 985.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2267.0,
|
|
"completions/max_terminated_length": 2267.0,
|
|
"completions/mean_length": 577.125,
|
|
"completions/mean_terminated_length": 581.6693115234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 176.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.06227808818221092,
|
|
"learning_rate": 2.6388888888888893e-06,
|
|
"loss": 0.0437,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03263796865940094,
|
|
"mask/share_reasoning": 0.8158507347106934,
|
|
"mask/share_step_conf": 0.1436988264322281,
|
|
"num_tokens": 26673437.0,
|
|
"reward": 1.1061831712722778,
|
|
"reward_std": 0.21990132331848145,
|
|
"rewards/accuracy_reward_step": 0.60546875,
|
|
"rewards/final_brier_reward_step": 0.6249972581863403,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8457460403442383,
|
|
"step": 105
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4969498813152313,
|
|
"adv/mean_abs_reasoning": 0.3592734932899475,
|
|
"adv/mean_abs_step_conf": 0.7686994075775146,
|
|
"adv/ratio_final_to_reasoning": 1.3832077528585547,
|
|
"adv/ratio_step_to_reasoning": 2.139593991581073,
|
|
"adv/std_final_conf": 0.7539573907852173,
|
|
"adv/std_reasoning": 0.6611307263374329,
|
|
"adv/std_step_conf": 0.9345561265945435,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6483600305110602,
|
|
"calib/avg_num_step_conf": 6.74609375,
|
|
"calib/ece": 0.4277777777777778,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0029672006102209325,
|
|
"calib/mean_conf": 0.9753968253968255,
|
|
"calib/mu_c": 0.9767391304347826,
|
|
"calib/mu_w": 0.9737719298245616,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4277777777777778,
|
|
"calib/std_conf": 0.004984228085113522,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.5138065326633166,
|
|
"calib/step_q_c_n": 796.0,
|
|
"calib/step_q_gap": 0.058919314618203766,
|
|
"calib/step_q_w": 0.4548872180451128,
|
|
"calib/step_q_w_n": 931.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2731.0,
|
|
"completions/max_terminated_length": 2731.0,
|
|
"completions/mean_length": 545.23046875,
|
|
"completions/mean_terminated_length": 547.36865234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 159.0,
|
|
"epoch": 0.11306666666666666,
|
|
"grad_norm": 0.05896085128188133,
|
|
"learning_rate": 2.6111111111111113e-06,
|
|
"loss": -0.0141,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.032741058617830276,
|
|
"mask/share_reasoning": 0.8298637866973877,
|
|
"mask/share_step_conf": 0.13348886370658875,
|
|
"num_tokens": 26917600.0,
|
|
"reward": 1.0589934587478638,
|
|
"reward_std": 0.16930758953094482,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.561801552772522,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8343318700790405,
|
|
"step": 106
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5585216283798218,
|
|
"adv/mean_abs_reasoning": 0.4725401997566223,
|
|
"adv/mean_abs_step_conf": 0.7294274568557739,
|
|
"adv/ratio_final_to_reasoning": 1.1819557969194652,
|
|
"adv/ratio_step_to_reasoning": 1.5436304831450511,
|
|
"adv/std_final_conf": 0.8102872371673584,
|
|
"adv/std_reasoning": 0.7752585411071777,
|
|
"adv/std_step_conf": 0.933884859085083,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.519613337069207,
|
|
"calib/avg_num_step_conf": 6.52734375,
|
|
"calib/ece": 0.31571428571428584,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.0003922667413843728,
|
|
"calib/mean_conf": 0.9744444444444446,
|
|
"calib/mu_c": 0.9745783132530123,
|
|
"calib/mu_w": 0.9741860465116279,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.31571428571428584,
|
|
"calib/std_conf": 0.004969039949999537,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.47640812557710066,
|
|
"calib/step_q_c_n": 1083.0,
|
|
"calib/step_q_gap": -0.0019422145589537143,
|
|
"calib/step_q_w": 0.4783503401360544,
|
|
"calib/step_q_w_n": 588.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 1915.0,
|
|
"completions/max_terminated_length": 1915.0,
|
|
"completions/mean_length": 492.140625,
|
|
"completions/mean_terminated_length": 499.9524230957031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.11413333333333334,
|
|
"grad_norm": 0.07943214476108551,
|
|
"learning_rate": 2.5833333333333337e-06,
|
|
"loss": -0.0572,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03573369234800339,
|
|
"mask/share_reasoning": 0.8075776100158691,
|
|
"mask/share_step_conf": 0.1410636603832245,
|
|
"num_tokens": 27148204.0,
|
|
"reward": 1.1372442245483398,
|
|
"reward_std": 0.2124222069978714,
|
|
"rewards/accuracy_reward_step": 0.6484375,
|
|
"rewards/final_brier_reward_step": 0.6651140451431274,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8552078008651733,
|
|
"step": 107
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.472678542137146,
|
|
"adv/mean_abs_reasoning": 0.3605843186378479,
|
|
"adv/mean_abs_step_conf": 0.7309489846229553,
|
|
"adv/ratio_final_to_reasoning": 1.3108682704859378,
|
|
"adv/ratio_step_to_reasoning": 2.0271236069949077,
|
|
"adv/std_final_conf": 0.6875787973403931,
|
|
"adv/std_reasoning": 0.6403064131736755,
|
|
"adv/std_step_conf": 0.9338570237159729,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6144736842105263,
|
|
"calib/avg_num_step_conf": 7.04296875,
|
|
"calib/ece": 0.2261811023622049,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.002289473684210841,
|
|
"calib/mean_conf": 0.974212598425197,
|
|
"calib/mu_c": 0.9747894736842108,
|
|
"calib/mu_w": 0.9724999999999999,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2261811023622049,
|
|
"calib/std_conf": 0.004937610632684351,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.47036405886909366,
|
|
"calib/step_q_c_n": 1291.0,
|
|
"calib/step_q_gap": 0.038860152619093646,
|
|
"calib/step_q_w": 0.43150390625,
|
|
"calib/step_q_w_n": 512.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2372.0,
|
|
"completions/max_terminated_length": 2372.0,
|
|
"completions/mean_length": 543.02734375,
|
|
"completions/mean_terminated_length": 545.1569213867188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 138.0,
|
|
"epoch": 0.1152,
|
|
"grad_norm": 0.08610007166862488,
|
|
"learning_rate": 2.5555555555555557e-06,
|
|
"loss": -0.0756,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03323179855942726,
|
|
"mask/share_reasoning": 0.8196040391921997,
|
|
"mask/share_step_conf": 0.14325785636901855,
|
|
"num_tokens": 27390451.0,
|
|
"reward": 1.1965038776397705,
|
|
"reward_std": 0.16265276074409485,
|
|
"rewards/accuracy_reward_step": 0.7421875,
|
|
"rewards/final_brier_reward_step": 0.7552535533905029,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8605860471725464,
|
|
"step": 108
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.3658500611782074,
|
|
"adv/mean_abs_reasoning": 0.26386594772338867,
|
|
"adv/mean_abs_step_conf": 0.7497037053108215,
|
|
"adv/ratio_final_to_reasoning": 1.3864997144752036,
|
|
"adv/ratio_step_to_reasoning": 2.841229464351868,
|
|
"adv/std_final_conf": 0.6053043007850647,
|
|
"adv/std_reasoning": 0.5483630895614624,
|
|
"adv/std_step_conf": 0.9330347180366516,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6764097926490537,
|
|
"calib/avg_num_step_conf": 7.3203125,
|
|
"calib/ece": 0.42448,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.996,
|
|
"calib/gap": 0.0043640591693041575,
|
|
"calib/mean_conf": 0.97248,
|
|
"calib/mu_c": 0.9744525547445255,
|
|
"calib/mu_w": 0.9700884955752214,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.42448,
|
|
"calib/std_conf": 0.007915150030163675,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.44699999999999995,
|
|
"calib/step_q_c_n": 910.0,
|
|
"calib/step_q_gap": 0.08826556016597509,
|
|
"calib/step_q_w": 0.35873443983402487,
|
|
"calib/step_q_w_n": 964.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2867.0,
|
|
"completions/max_terminated_length": 2867.0,
|
|
"completions/mean_length": 587.4765625,
|
|
"completions/mean_terminated_length": 587.4765625,
|
|
"completions/min_length": 167.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.11626666666666667,
|
|
"grad_norm": 0.06257660686969757,
|
|
"learning_rate": 2.5277777777777778e-06,
|
|
"loss": 0.0175,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03028855472803116,
|
|
"mask/share_reasoning": 0.8285435438156128,
|
|
"mask/share_step_conf": 0.14116786420345306,
|
|
"num_tokens": 27645445.0,
|
|
"reward": 1.0708765983581543,
|
|
"reward_std": 0.14244344830513,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/final_brier_reward_step": 0.5602999925613403,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.8537813425064087,
|
|
"step": 109
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5903299450874329,
|
|
"adv/mean_abs_reasoning": 0.41868531703948975,
|
|
"adv/mean_abs_step_conf": 0.7497169971466064,
|
|
"adv/ratio_final_to_reasoning": 1.4099609445624621,
|
|
"adv/ratio_step_to_reasoning": 1.7906455436455975,
|
|
"adv/std_final_conf": 0.7605558633804321,
|
|
"adv/std_reasoning": 0.6614421010017395,
|
|
"adv/std_step_conf": 0.9334558844566345,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5956521739130435,
|
|
"calib/avg_num_step_conf": 6.7265625,
|
|
"calib/ece": 0.42758893280632426,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.001913043478260601,
|
|
"calib/mean_conf": 0.9730434782608697,
|
|
"calib/mu_c": 0.9739130434782607,
|
|
"calib/mu_w": 0.9720000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.42758893280632426,
|
|
"calib/std_conf": 0.004601306627938423,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.44374149659863943,
|
|
"calib/step_q_c_n": 735.0,
|
|
"calib/step_q_gap": 0.08096540743957159,
|
|
"calib/step_q_w": 0.36277608915906784,
|
|
"calib/step_q_w_n": 987.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2436.0,
|
|
"completions/max_terminated_length": 2436.0,
|
|
"completions/mean_length": 484.45703125,
|
|
"completions/mean_terminated_length": 488.2716369628906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 128.0,
|
|
"epoch": 0.11733333333333333,
|
|
"grad_norm": 0.0538322739303112,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0599,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.037332724779844284,
|
|
"mask/share_reasoning": 0.8117356300354004,
|
|
"mask/share_step_conf": 0.14311917126178741,
|
|
"num_tokens": 27874386.0,
|
|
"reward": 1.075239658355713,
|
|
"reward_std": 0.18618369102478027,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.5634796619415283,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8543539643287659,
|
|
"step": 110
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5618973970413208,
|
|
"adv/mean_abs_reasoning": 0.41221410036087036,
|
|
"adv/mean_abs_step_conf": 0.7482247352600098,
|
|
"adv/ratio_final_to_reasoning": 1.3631202730557035,
|
|
"adv/ratio_step_to_reasoning": 1.8151361988951396,
|
|
"adv/std_final_conf": 0.7664877772331238,
|
|
"adv/std_reasoning": 0.68166184425354,
|
|
"adv/std_step_conf": 0.9334881901741028,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6659549689440992,
|
|
"calib/avg_num_step_conf": 6.5390625,
|
|
"calib/ece": 0.42140000000000016,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.992,
|
|
"calib/gap": 0.0050271739130431925,
|
|
"calib/mean_conf": 0.9734,
|
|
"calib/mu_c": 0.9756521739130434,
|
|
"calib/mu_w": 0.9706250000000002,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.42140000000000016,
|
|
"calib/std_conf": 0.010509043724335723,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4369565217391304,
|
|
"calib/step_q_c_n": 736.0,
|
|
"calib/step_q_gap": 0.07084777973486606,
|
|
"calib/step_q_w": 0.36610874200426435,
|
|
"calib/step_q_w_n": 938.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2930.0,
|
|
"completions/max_terminated_length": 2930.0,
|
|
"completions/mean_length": 558.140625,
|
|
"completions/mean_terminated_length": 562.5354614257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 128.0,
|
|
"epoch": 0.1184,
|
|
"grad_norm": 0.057587627321481705,
|
|
"learning_rate": 2.4722222222222226e-06,
|
|
"loss": -0.0325,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.0363595113158226,
|
|
"mask/share_reasoning": 0.8237424492835999,
|
|
"mask/share_step_conf": 0.13208553194999695,
|
|
"num_tokens": 28124678.0,
|
|
"reward": 1.0738959312438965,
|
|
"reward_std": 0.18189433217048645,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.5639668107032776,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8537999987602234,
|
|
"step": 111
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5185309648513794,
|
|
"adv/mean_abs_reasoning": 0.4189787209033966,
|
|
"adv/mean_abs_step_conf": 0.7416942119598389,
|
|
"adv/ratio_final_to_reasoning": 1.2376069212616085,
|
|
"adv/ratio_step_to_reasoning": 1.7702431530665979,
|
|
"adv/std_final_conf": 0.7534166574478149,
|
|
"adv/std_reasoning": 0.7013196349143982,
|
|
"adv/std_step_conf": 0.9343290328979492,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7158685256280731,
|
|
"calib/avg_num_step_conf": 7.2109375,
|
|
"calib/ece": 0.3785887096774195,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.9758064516129032,
|
|
"calib/gap": 0.006921937091668218,
|
|
"calib/mean_conf": 0.9713306451612905,
|
|
"calib/mu_c": 0.9741496598639455,
|
|
"calib/mu_w": 0.9672277227722773,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.3785887096774195,
|
|
"calib/std_conf": 0.01641951129591469,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.412217659137577,
|
|
"calib/step_q_c_n": 974.0,
|
|
"calib/step_q_gap": 0.1006465582201459,
|
|
"calib/step_q_w": 0.3115711009174311,
|
|
"calib/step_q_w_n": 872.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2498.0,
|
|
"completions/max_terminated_length": 2498.0,
|
|
"completions/mean_length": 598.7265625,
|
|
"completions/mean_terminated_length": 605.8261108398438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.0,
|
|
"epoch": 0.11946666666666667,
|
|
"grad_norm": 0.08669447153806686,
|
|
"learning_rate": 2.4444444444444447e-06,
|
|
"loss": -0.0538,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.02860027551651001,
|
|
"mask/share_reasoning": 0.8302577137947083,
|
|
"mask/share_step_conf": 0.12942329049110413,
|
|
"num_tokens": 28385872.0,
|
|
"reward": 1.0846667289733887,
|
|
"reward_std": 0.17266321182250977,
|
|
"rewards/accuracy_reward_step": 0.57421875,
|
|
"rewards/final_brier_reward_step": 0.5990207195281982,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.8411459922790527,
|
|
"step": 112
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5774078369140625,
|
|
"adv/mean_abs_reasoning": 0.44517821073532104,
|
|
"adv/mean_abs_step_conf": 0.7621119022369385,
|
|
"adv/ratio_final_to_reasoning": 1.297026276196968,
|
|
"adv/ratio_step_to_reasoning": 1.7119254353849072,
|
|
"adv/std_final_conf": 0.8084132671356201,
|
|
"adv/std_reasoning": 0.7392378449440002,
|
|
"adv/std_step_conf": 0.9335689544677734,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6918721061193842,
|
|
"calib/avg_num_step_conf": 6.69921875,
|
|
"calib/ece": 0.4561660079051384,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.9960474308300395,
|
|
"calib/gap": 0.004625829057689601,
|
|
"calib/mean_conf": 0.9739525691699605,
|
|
"calib/mu_c": 0.9761832061068701,
|
|
"calib/mu_w": 0.9715573770491805,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4561660079051384,
|
|
"calib/std_conf": 0.008208602166376925,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.41018404907975453,
|
|
"calib/step_q_c_n": 815.0,
|
|
"calib/step_q_gap": 0.039806271301976726,
|
|
"calib/step_q_w": 0.3703777777777778,
|
|
"calib/step_q_w_n": 900.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3042.0,
|
|
"completions/max_terminated_length": 3042.0,
|
|
"completions/mean_length": 484.421875,
|
|
"completions/mean_terminated_length": 486.32159423828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.12053333333333334,
|
|
"grad_norm": 0.0665605217218399,
|
|
"learning_rate": 2.4166666666666667e-06,
|
|
"loss": -0.0361,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03515036031603813,
|
|
"mask/share_reasoning": 0.8132357597351074,
|
|
"mask/share_step_conf": 0.14770758152008057,
|
|
"num_tokens": 28615084.0,
|
|
"reward": 1.0729321241378784,
|
|
"reward_std": 0.18777276575565338,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5378601551055908,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8725234866142273,
|
|
"step": 113
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4570245146751404,
|
|
"adv/mean_abs_reasoning": 0.40027526021003723,
|
|
"adv/mean_abs_step_conf": 0.7232214212417603,
|
|
"adv/ratio_final_to_reasoning": 1.141775573227596,
|
|
"adv/ratio_step_to_reasoning": 1.8068101957194727,
|
|
"adv/std_final_conf": 0.7423800826072693,
|
|
"adv/std_reasoning": 0.7013442516326904,
|
|
"adv/std_step_conf": 0.9326123595237732,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.730661040787623,
|
|
"calib/avg_num_step_conf": 7.09765625,
|
|
"calib/ece": 0.29232,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": 0.004613220815752661,
|
|
"calib/mean_conf": 0.9763200000000001,
|
|
"calib/mu_c": 0.9777777777777781,
|
|
"calib/mu_w": 0.9731645569620254,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.29232,
|
|
"calib/std_conf": 0.004822613399392495,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.43347910592808553,
|
|
"calib/step_q_c_n": 1029.0,
|
|
"calib/step_q_gap": 0.08303494349153734,
|
|
"calib/step_q_w": 0.3504441624365482,
|
|
"calib/step_q_w_n": 788.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2923.0,
|
|
"completions/max_terminated_length": 2923.0,
|
|
"completions/mean_length": 497.50390625,
|
|
"completions/mean_terminated_length": 503.4031677246094,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.1216,
|
|
"grad_norm": 0.06393461674451828,
|
|
"learning_rate": 2.388888888888889e-06,
|
|
"loss": -0.0075,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.037301406264305115,
|
|
"mask/share_reasoning": 0.7991474866867065,
|
|
"mask/share_step_conf": 0.15183240175247192,
|
|
"num_tokens": 28847469.0,
|
|
"reward": 1.1506597995758057,
|
|
"reward_std": 0.18302500247955322,
|
|
"rewards/accuracy_reward_step": 0.66796875,
|
|
"rewards/final_brier_reward_step": 0.6839609146118164,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8589682579040527,
|
|
"step": 114
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5060404539108276,
|
|
"adv/mean_abs_reasoning": 0.4313282370567322,
|
|
"adv/mean_abs_step_conf": 0.7706390619277954,
|
|
"adv/ratio_final_to_reasoning": 1.1732142958316654,
|
|
"adv/ratio_step_to_reasoning": 1.7866649936633618,
|
|
"adv/std_final_conf": 0.7697705626487732,
|
|
"adv/std_reasoning": 0.7204693555831909,
|
|
"adv/std_step_conf": 0.9332576394081116,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6138629888629888,
|
|
"calib/avg_num_step_conf": 6.5234375,
|
|
"calib/ece": 0.3597254901960786,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.9529411764705882,
|
|
"calib/gap": 0.01085664335664338,
|
|
"calib/mean_conf": 0.9714901960784316,
|
|
"calib/mu_c": 0.9757051282051284,
|
|
"calib/mu_w": 0.9648484848484851,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3597254901960786,
|
|
"calib/std_conf": 0.023028858748441506,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4334791454730417,
|
|
"calib/step_q_c_n": 983.0,
|
|
"calib/step_q_gap": 0.05564799554582195,
|
|
"calib/step_q_w": 0.3778311499272198,
|
|
"calib/step_q_w_n": 687.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1151.0,
|
|
"completions/max_terminated_length": 1151.0,
|
|
"completions/mean_length": 450.578125,
|
|
"completions/mean_terminated_length": 452.3451232910156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.0,
|
|
"epoch": 0.12266666666666666,
|
|
"grad_norm": 0.05441709980368614,
|
|
"learning_rate": 2.361111111111111e-06,
|
|
"loss": 0.052,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03709343448281288,
|
|
"mask/share_reasoning": 0.8064676523208618,
|
|
"mask/share_step_conf": 0.15253272652626038,
|
|
"num_tokens": 29068081.0,
|
|
"reward": 1.1251858472824097,
|
|
"reward_std": 0.1664474904537201,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/final_brier_reward_step": 0.6352245807647705,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8627021908760071,
|
|
"step": 115
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5148700475692749,
|
|
"adv/mean_abs_reasoning": 0.40699562430381775,
|
|
"adv/mean_abs_step_conf": 0.758590042591095,
|
|
"adv/ratio_final_to_reasoning": 1.265050572595174,
|
|
"adv/ratio_step_to_reasoning": 1.863877637231834,
|
|
"adv/std_final_conf": 0.7608790397644043,
|
|
"adv/std_reasoning": 0.7012465000152588,
|
|
"adv/std_step_conf": 0.9334179162979126,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6482262465889357,
|
|
"calib/avg_num_step_conf": 6.99609375,
|
|
"calib/ece": 0.42666666666666664,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.9607843137254902,
|
|
"calib/gap": 0.008298189034979142,
|
|
"calib/mean_conf": 0.971764705882353,
|
|
"calib/mu_c": 0.975539568345324,
|
|
"calib/mu_w": 0.9672413793103448,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.42666666666666664,
|
|
"calib/std_conf": 0.021116472740302206,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4242675159235668,
|
|
"calib/step_q_c_n": 942.0,
|
|
"calib/step_q_gap": 0.04038059012851386,
|
|
"calib/step_q_w": 0.38388692579505296,
|
|
"calib/step_q_w_n": 849.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2745.0,
|
|
"completions/max_terminated_length": 2745.0,
|
|
"completions/mean_length": 562.62890625,
|
|
"completions/mean_terminated_length": 562.62890625,
|
|
"completions/min_length": 95.0,
|
|
"completions/min_terminated_length": 95.0,
|
|
"epoch": 0.12373333333333333,
|
|
"grad_norm": 0.08195675164461136,
|
|
"learning_rate": 2.3333333333333336e-06,
|
|
"loss": -0.0084,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03406328707933426,
|
|
"mask/share_reasoning": 0.8288680911064148,
|
|
"mask/share_step_conf": 0.13706859946250916,
|
|
"num_tokens": 29316634.0,
|
|
"reward": 1.107461929321289,
|
|
"reward_std": 0.1593656986951828,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/final_brier_reward_step": 0.5714179277420044,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8904621601104736,
|
|
"step": 116
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6606265306472778,
|
|
"adv/mean_abs_reasoning": 0.490254670381546,
|
|
"adv/mean_abs_step_conf": 0.7552804946899414,
|
|
"adv/ratio_final_to_reasoning": 1.3475170570698247,
|
|
"adv/ratio_step_to_reasoning": 1.540588066406662,
|
|
"adv/std_final_conf": 0.8490501046180725,
|
|
"adv/std_reasoning": 0.7392802834510803,
|
|
"adv/std_step_conf": 0.9333322048187256,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6984117212509234,
|
|
"calib/avg_num_step_conf": 6.46875,
|
|
"calib/ece": 0.4463137254901964,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.8392156862745098,
|
|
"calib/gap": 0.02378047278995321,
|
|
"calib/mean_conf": 0.9600392156862746,
|
|
"calib/mu_c": 0.9716030534351147,
|
|
"calib/mu_w": 0.9478225806451614,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4463137254901964,
|
|
"calib/std_conf": 0.038719686832468826,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4325380710659899,
|
|
"calib/step_q_c_n": 788.0,
|
|
"calib/step_q_gap": 0.04828691899225723,
|
|
"calib/step_q_w": 0.38425115207373267,
|
|
"calib/step_q_w_n": 868.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2492.0,
|
|
"completions/max_terminated_length": 2492.0,
|
|
"completions/mean_length": 515.0390625,
|
|
"completions/mean_terminated_length": 515.0390625,
|
|
"completions/min_length": 172.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.1248,
|
|
"grad_norm": 0.09659411013126373,
|
|
"learning_rate": 2.305555555555556e-06,
|
|
"loss": -0.1011,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03404964506626129,
|
|
"mask/share_reasoning": 0.8259721398353577,
|
|
"mask/share_step_conf": 0.13997818529605865,
|
|
"num_tokens": 29555084.0,
|
|
"reward": 1.0920822620391846,
|
|
"reward_std": 0.18369436264038086,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5591816306114197,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8822801113128662,
|
|
"step": 117
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5085911154747009,
|
|
"adv/mean_abs_reasoning": 0.33327728509902954,
|
|
"adv/mean_abs_step_conf": 0.7403470277786255,
|
|
"adv/ratio_final_to_reasoning": 1.5260299402750441,
|
|
"adv/ratio_step_to_reasoning": 2.221414602434246,
|
|
"adv/std_final_conf": 0.7594017386436462,
|
|
"adv/std_reasoning": 0.6401320695877075,
|
|
"adv/std_step_conf": 0.9297709465026855,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7194628647214855,
|
|
"calib/avg_num_step_conf": 6.90625,
|
|
"calib/ece": 0.36622489959839366,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.7429718875502008,
|
|
"calib/gap": 0.03466909814323582,
|
|
"calib/mean_conf": 0.9485542168674699,
|
|
"calib/mu_c": 0.9630344827586206,
|
|
"calib/mu_w": 0.9283653846153848,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.36622489959839366,
|
|
"calib/std_conf": 0.05085926396649889,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4019910011248594,
|
|
"calib/step_q_c_n": 889.0,
|
|
"calib/step_q_gap": 0.04269634811006989,
|
|
"calib/step_q_w": 0.3592946530147895,
|
|
"calib/step_q_w_n": 879.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2543.0,
|
|
"completions/max_terminated_length": 2543.0,
|
|
"completions/mean_length": 499.3046875,
|
|
"completions/mean_terminated_length": 507.2301940917969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.12586666666666665,
|
|
"grad_norm": 0.06263020634651184,
|
|
"learning_rate": 2.277777777777778e-06,
|
|
"loss": -0.0716,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03446434438228607,
|
|
"mask/share_reasoning": 0.8053621053695679,
|
|
"mask/share_step_conf": 0.14454856514930725,
|
|
"num_tokens": 29786914.0,
|
|
"reward": 1.0896894931793213,
|
|
"reward_std": 0.1556907594203949,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/final_brier_reward_step": 0.6195191144943237,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8341772556304932,
|
|
"step": 118
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6502874493598938,
|
|
"adv/mean_abs_reasoning": 0.4082077741622925,
|
|
"adv/mean_abs_step_conf": 0.7355071902275085,
|
|
"adv/ratio_final_to_reasoning": 1.5930305362125634,
|
|
"adv/ratio_step_to_reasoning": 1.8017961361389716,
|
|
"adv/std_final_conf": 0.8503894209861755,
|
|
"adv/std_reasoning": 0.6816251277923584,
|
|
"adv/std_step_conf": 0.9338120222091675,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7246560228452752,
|
|
"calib/avg_num_step_conf": 7.41796875,
|
|
"calib/ece": 0.35003984063745025,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.5139442231075697,
|
|
"calib/gap": 0.049859813084112026,
|
|
"calib/mean_conf": 0.9237450199203188,
|
|
"calib/mu_c": 0.9449999999999998,
|
|
"calib/mu_w": 0.8951401869158878,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.35003984063745025,
|
|
"calib/std_conf": 0.05878177893667188,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42159609120521174,
|
|
"calib/step_q_c_n": 921.0,
|
|
"calib/step_q_gap": 0.08709711370009926,
|
|
"calib/step_q_w": 0.3344989775051125,
|
|
"calib/step_q_w_n": 978.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2936.0,
|
|
"completions/max_terminated_length": 2936.0,
|
|
"completions/mean_length": 639.0,
|
|
"completions/mean_terminated_length": 639.0,
|
|
"completions/min_length": 111.0,
|
|
"completions/min_terminated_length": 111.0,
|
|
"epoch": 0.12693333333333334,
|
|
"grad_norm": 0.11909914016723633,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": 0.0345,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.030238190665841103,
|
|
"mask/share_reasoning": 0.8378813862800598,
|
|
"mask/share_step_conf": 0.13188043236732483,
|
|
"num_tokens": 30055562.0,
|
|
"reward": 1.1247520446777344,
|
|
"reward_std": 0.1627296805381775,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/final_brier_reward_step": 0.6410672068595886,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8665619492530823,
|
|
"step": 119
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5520824790000916,
|
|
"adv/mean_abs_reasoning": 0.27621161937713623,
|
|
"adv/mean_abs_step_conf": 0.7582372426986694,
|
|
"adv/ratio_final_to_reasoning": 1.9987663091257735,
|
|
"adv/ratio_step_to_reasoning": 2.7451315929739395,
|
|
"adv/std_final_conf": 0.8035014867782593,
|
|
"adv/std_reasoning": 0.5725486874580383,
|
|
"adv/std_step_conf": 0.932507336139679,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.7933700118392646,
|
|
"calib/avg_num_step_conf": 6.1015625,
|
|
"calib/ece": 0.2482812499999999,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.515625,
|
|
"calib/gap": 0.06056689184483577,
|
|
"calib/mean_conf": 0.9240625,
|
|
"calib/mu_c": 0.9436994219653179,
|
|
"calib/mu_w": 0.8831325301204821,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2482812499999999,
|
|
"calib/std_conf": 0.05941035131818359,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4170348258706468,
|
|
"calib/step_q_c_n": 1005.0,
|
|
"calib/step_q_gap": 0.058489044901167464,
|
|
"calib/step_q_w": 0.3585457809694793,
|
|
"calib/step_q_w_n": 557.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1254.0,
|
|
"completions/max_terminated_length": 1254.0,
|
|
"completions/mean_length": 469.94140625,
|
|
"completions/mean_terminated_length": 471.7843322753906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 163.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.09170673042535782,
|
|
"learning_rate": 2.222222222222222e-06,
|
|
"loss": -0.0359,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03412052243947983,
|
|
"mask/share_reasoning": 0.82478928565979,
|
|
"mask/share_step_conf": 0.13718390464782715,
|
|
"num_tokens": 30282555.0,
|
|
"reward": 1.2048192024230957,
|
|
"reward_std": 0.10698007047176361,
|
|
"rewards/accuracy_reward_step": 0.67578125,
|
|
"rewards/final_brier_reward_step": 0.7422664165496826,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/step_l2_reward": 0.8881438374519348,
|
|
"step": 120
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6064180135726929,
|
|
"adv/mean_abs_reasoning": 0.48254069685935974,
|
|
"adv/mean_abs_step_conf": 0.7727996706962585,
|
|
"adv/ratio_final_to_reasoning": 1.256718899607007,
|
|
"adv/ratio_step_to_reasoning": 1.6015222668804183,
|
|
"adv/std_final_conf": 0.8220515847206116,
|
|
"adv/std_reasoning": 0.739255428314209,
|
|
"adv/std_step_conf": 0.9320149421691895,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5662593984962406,
|
|
"calib/avg_num_step_conf": 6.72265625,
|
|
"calib/ece": 0.38208661417322853,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.5984251968503937,
|
|
"calib/gap": 0.013408521303258425,
|
|
"calib/mean_conf": 0.9332677165354332,
|
|
"calib/mu_c": 0.9392857142857145,
|
|
"calib/mu_w": 0.9258771929824561,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.38208661417322853,
|
|
"calib/std_conf": 0.06015032989206602,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4264102564102564,
|
|
"calib/step_q_c_n": 858.0,
|
|
"calib/step_q_gap": 0.030454288855215894,
|
|
"calib/step_q_w": 0.39595596755504053,
|
|
"calib/step_q_w_n": 863.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2808.0,
|
|
"completions/max_terminated_length": 2808.0,
|
|
"completions/mean_length": 547.4296875,
|
|
"completions/mean_terminated_length": 549.5764770507812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 122.0,
|
|
"epoch": 0.12906666666666666,
|
|
"grad_norm": 0.10754235833883286,
|
|
"learning_rate": 2.1944444444444445e-06,
|
|
"loss": 0.0076,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.0344381257891655,
|
|
"mask/share_reasoning": 0.8288736939430237,
|
|
"mask/share_step_conf": 0.1327819526195526,
|
|
"num_tokens": 30527753.0,
|
|
"reward": 1.109503984451294,
|
|
"reward_std": 0.17705771327018738,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/final_brier_reward_step": 0.6048824191093445,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8708754181861877,
|
|
"step": 121
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5294678211212158,
|
|
"adv/mean_abs_reasoning": 0.43470725417137146,
|
|
"adv/mean_abs_step_conf": 0.7290816307067871,
|
|
"adv/ratio_final_to_reasoning": 1.217987084504662,
|
|
"adv/ratio_step_to_reasoning": 1.6771784314861389,
|
|
"adv/std_final_conf": 0.7790201306343079,
|
|
"adv/std_reasoning": 0.7205578684806824,
|
|
"adv/std_step_conf": 0.9326818585395813,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7954926624737946,
|
|
"calib/avg_num_step_conf": 7.2890625,
|
|
"calib/ece": 0.2971084337349401,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.6867469879518072,
|
|
"calib/gap": 0.0826457023060797,
|
|
"calib/mean_conf": 0.9356626506024097,
|
|
"calib/mu_c": 0.9655345911949686,
|
|
"calib/mu_w": 0.882888888888889,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2971084337349401,
|
|
"calib/std_conf": 0.07459436929369202,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4255555555555556,
|
|
"calib/step_q_c_n": 981.0,
|
|
"calib/step_q_gap": 0.11382674199623355,
|
|
"calib/step_q_w": 0.311728813559322,
|
|
"calib/step_q_w_n": 885.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2948.0,
|
|
"completions/max_terminated_length": 2948.0,
|
|
"completions/mean_length": 519.33203125,
|
|
"completions/mean_terminated_length": 523.4212646484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.13013333333333332,
|
|
"grad_norm": 0.06262241303920746,
|
|
"learning_rate": 2.166666666666667e-06,
|
|
"loss": -0.0341,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03336969390511513,
|
|
"mask/share_reasoning": 0.8161087036132812,
|
|
"mask/share_step_conf": 0.14270910620689392,
|
|
"num_tokens": 30768046.0,
|
|
"reward": 1.1628289222717285,
|
|
"reward_std": 0.19497352838516235,
|
|
"rewards/accuracy_reward_step": 0.62109375,
|
|
"rewards/final_brier_reward_step": 0.6939992308616638,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8752723932266235,
|
|
"step": 122
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6652189493179321,
|
|
"adv/mean_abs_reasoning": 0.5161447525024414,
|
|
"adv/mean_abs_step_conf": 0.7464606761932373,
|
|
"adv/ratio_final_to_reasoning": 1.288822459383205,
|
|
"adv/ratio_step_to_reasoning": 1.44622351108705,
|
|
"adv/std_final_conf": 0.8896883130073547,
|
|
"adv/std_reasoning": 0.7754158973693848,
|
|
"adv/std_step_conf": 0.93485027551651,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6206302794022093,
|
|
"calib/avg_num_step_conf": 6.27734375,
|
|
"calib/ece": 0.38052208835341383,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.570281124497992,
|
|
"calib/gap": 0.03020272904483412,
|
|
"calib/mean_conf": 0.9226907630522089,
|
|
"calib/mu_c": 0.9365185185185185,
|
|
"calib/mu_w": 0.9063157894736844,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.38052208835341383,
|
|
"calib/std_conf": 0.07372741472306667,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4364375,
|
|
"calib/step_q_c_n": 800.0,
|
|
"calib/step_q_gap": 0.06425387717121583,
|
|
"calib/step_q_w": 0.37218362282878414,
|
|
"calib/step_q_w_n": 806.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2745.0,
|
|
"completions/max_terminated_length": 2745.0,
|
|
"completions/mean_length": 603.11328125,
|
|
"completions/mean_terminated_length": 605.4784545898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 128.0,
|
|
"epoch": 0.1312,
|
|
"grad_norm": 0.08993158489465714,
|
|
"learning_rate": 2.138888888888889e-06,
|
|
"loss": -0.0675,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03210265934467316,
|
|
"mask/share_reasoning": 0.8405189514160156,
|
|
"mask/share_step_conf": 0.12347208708524704,
|
|
"num_tokens": 31027731.0,
|
|
"reward": 1.0731170177459717,
|
|
"reward_std": 0.22716030478477478,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/final_brier_reward_step": 0.5956000089645386,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8342767953872681,
|
|
"step": 123
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5562883615493774,
|
|
"adv/mean_abs_reasoning": 0.3537698984146118,
|
|
"adv/mean_abs_step_conf": 0.7436978816986084,
|
|
"adv/ratio_final_to_reasoning": 1.5724581544171339,
|
|
"adv/ratio_step_to_reasoning": 2.102207918286502,
|
|
"adv/std_final_conf": 0.7942464351654053,
|
|
"adv/std_reasoning": 0.6402589082717896,
|
|
"adv/std_step_conf": 0.9312188029289246,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6676470588235294,
|
|
"calib/avg_num_step_conf": 6.49609375,
|
|
"calib/ece": 0.3279051383399212,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6482213438735178,
|
|
"calib/gap": 0.048364705882352976,
|
|
"calib/mean_conf": 0.9326482213438737,
|
|
"calib/mu_c": 0.951764705882353,
|
|
"calib/mu_w": 0.9034,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3279051383399212,
|
|
"calib/std_conf": 0.070803566481362,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4387348178137652,
|
|
"calib/step_q_c_n": 988.0,
|
|
"calib/step_q_gap": 0.03294222522117268,
|
|
"calib/step_q_w": 0.40579259259259254,
|
|
"calib/step_q_w_n": 675.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2152.0,
|
|
"completions/max_terminated_length": 2152.0,
|
|
"completions/mean_length": 525.47265625,
|
|
"completions/mean_terminated_length": 527.5333862304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.13226666666666667,
|
|
"grad_norm": 0.08522067964076996,
|
|
"learning_rate": 2.1111111111111114e-06,
|
|
"loss": -0.0915,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03211629018187523,
|
|
"mask/share_reasoning": 0.8295165300369263,
|
|
"mask/share_step_conf": 0.1344609558582306,
|
|
"num_tokens": 31269068.0,
|
|
"reward": 1.132234811782837,
|
|
"reward_std": 0.15816253423690796,
|
|
"rewards/accuracy_reward_step": 0.6015625,
|
|
"rewards/final_brier_reward_step": 0.6636874675750732,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8552088737487793,
|
|
"step": 124
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5968010425567627,
|
|
"adv/mean_abs_reasoning": 0.5435364246368408,
|
|
"adv/mean_abs_step_conf": 0.7577736377716064,
|
|
"adv/ratio_final_to_reasoning": 1.0979964092664263,
|
|
"adv/ratio_step_to_reasoning": 1.3941542892510035,
|
|
"adv/std_final_conf": 0.7921422123908997,
|
|
"adv/std_reasoning": 0.7577725648880005,
|
|
"adv/std_step_conf": 0.9344490766525269,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5902048144700092,
|
|
"calib/avg_num_step_conf": 6.61328125,
|
|
"calib/ece": 0.3313253012048195,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.6024096385542169,
|
|
"calib/gap": 0.029971405772043935,
|
|
"calib/mean_conf": 0.9176706827309239,
|
|
"calib/mu_c": 0.9300684931506851,
|
|
"calib/mu_w": 0.9000970873786411,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3313253012048195,
|
|
"calib/std_conf": 0.08431745797170943,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42316427783902977,
|
|
"calib/step_q_c_n": 907.0,
|
|
"calib/step_q_gap": 0.012136288017146835,
|
|
"calib/step_q_w": 0.41102798982188293,
|
|
"calib/step_q_w_n": 786.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2591.0,
|
|
"completions/max_terminated_length": 2591.0,
|
|
"completions/mean_length": 538.6796875,
|
|
"completions/mean_terminated_length": 545.0671997070312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.13333333333333333,
|
|
"grad_norm": 0.08532305061817169,
|
|
"learning_rate": 2.0833333333333334e-06,
|
|
"loss": -0.0838,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.033499591052532196,
|
|
"mask/share_reasoning": 0.82036292552948,
|
|
"mask/share_step_conf": 0.13441874086856842,
|
|
"num_tokens": 31511778.0,
|
|
"reward": 1.1019268035888672,
|
|
"reward_std": 0.21884110569953918,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/final_brier_reward_step": 0.6370406150817871,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.839333713054657,
|
|
"step": 125
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.54444819688797,
|
|
"adv/mean_abs_reasoning": 0.41777682304382324,
|
|
"adv/mean_abs_step_conf": 0.7403782606124878,
|
|
"adv/ratio_final_to_reasoning": 1.3032034494428126,
|
|
"adv/ratio_step_to_reasoning": 1.7721860567043108,
|
|
"adv/std_final_conf": 0.7756986021995544,
|
|
"adv/std_reasoning": 0.6817764043807983,
|
|
"adv/std_step_conf": 0.9344750046730042,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7156347352024922,
|
|
"calib/avg_num_step_conf": 7.6875,
|
|
"calib/ece": 0.3045418326693228,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.545816733067729,
|
|
"calib/gap": 0.10299389927310498,
|
|
"calib/mean_conf": 0.8782470119521912,
|
|
"calib/mu_c": 0.9221527777777779,
|
|
"calib/mu_w": 0.819158878504673,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3045418326693228,
|
|
"calib/std_conf": 0.13293842275301604,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.41729032258064513,
|
|
"calib/step_q_c_n": 930.0,
|
|
"calib/step_q_gap": 0.09042134377525018,
|
|
"calib/step_q_w": 0.32686897880539495,
|
|
"calib/step_q_w_n": 1038.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2424.0,
|
|
"completions/max_terminated_length": 2424.0,
|
|
"completions/mean_length": 537.33203125,
|
|
"completions/mean_terminated_length": 543.7035522460938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 188.0,
|
|
"epoch": 0.1344,
|
|
"grad_norm": 0.10794106125831604,
|
|
"learning_rate": 2.0555555555555555e-06,
|
|
"loss": -0.0897,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.032997895032167435,
|
|
"mask/share_reasoning": 0.8024303317070007,
|
|
"mask/share_step_conf": 0.15285301208496094,
|
|
"num_tokens": 31754799.0,
|
|
"reward": 1.1234350204467773,
|
|
"reward_std": 0.1804707646369934,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/final_brier_reward_step": 0.6802804470062256,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8391845226287842,
|
|
"step": 126
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6498432755470276,
|
|
"adv/mean_abs_reasoning": 0.4853948950767517,
|
|
"adv/mean_abs_step_conf": 0.7725515365600586,
|
|
"adv/ratio_final_to_reasoning": 1.3387929748298506,
|
|
"adv/ratio_step_to_reasoning": 1.5915938638742813,
|
|
"adv/std_final_conf": 0.8430218696594238,
|
|
"adv/std_reasoning": 0.7394492626190186,
|
|
"adv/std_step_conf": 0.9329238533973694,
|
|
"calib/answer_extract_rate": 0.94921875,
|
|
"calib/auroc": 0.6920798898071625,
|
|
"calib/avg_num_step_conf": 7.234375,
|
|
"calib/ece": 0.35410788381742747,
|
|
"calib/final_conf_rate": 0.94140625,
|
|
"calib/format_rate": 0.94140625,
|
|
"calib/frac_conf_gt_0.9": 0.43983402489626555,
|
|
"calib/gap": 0.0855454545454547,
|
|
"calib/mean_conf": 0.8480497925311203,
|
|
"calib/mu_c": 0.8910000000000001,
|
|
"calib/mu_w": 0.8054545454545454,
|
|
"calib/nonempty_final_conf_rate": 0.94140625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3521161825726142,
|
|
"calib/std_conf": 0.14278474357536333,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4363019390581717,
|
|
"calib/step_q_c_n": 722.0,
|
|
"calib/step_q_gap": 0.07835503640330443,
|
|
"calib/step_q_w": 0.3579469026548673,
|
|
"calib/step_q_w_n": 1130.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0234375,
|
|
"completions/max_length": 2625.0,
|
|
"completions/max_terminated_length": 2625.0,
|
|
"completions/mean_length": 540.75390625,
|
|
"completions/mean_terminated_length": 553.7320556640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 148.0,
|
|
"epoch": 0.13546666666666668,
|
|
"grad_norm": 0.1398288458585739,
|
|
"learning_rate": 2.027777777777778e-06,
|
|
"loss": -0.0272,
|
|
"mask/has_final_conf_rate": 0.94140625,
|
|
"mask/share_final_conf": 0.033511899411678314,
|
|
"mask/share_reasoning": 0.8006240129470825,
|
|
"mask/share_step_conf": 0.14242660999298096,
|
|
"num_tokens": 31996904.0,
|
|
"reward": 1.0577702522277832,
|
|
"reward_std": 0.20324760675430298,
|
|
"rewards/accuracy_reward_step": 0.46875,
|
|
"rewards/final_brier_reward_step": 0.6117273569107056,
|
|
"rewards/format_reward_step": 0.94140625,
|
|
"rewards/step_l2_reward": 0.814521312713623,
|
|
"step": 127
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7356740832328796,
|
|
"adv/mean_abs_reasoning": 0.5345216393470764,
|
|
"adv/mean_abs_step_conf": 0.7763804197311401,
|
|
"adv/ratio_final_to_reasoning": 1.3763223583080995,
|
|
"adv/ratio_step_to_reasoning": 1.4524770609464879,
|
|
"adv/std_final_conf": 0.8902674317359924,
|
|
"adv/std_reasoning": 0.7576658129692078,
|
|
"adv/std_step_conf": 0.9353314638137817,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.655264857881137,
|
|
"calib/avg_num_step_conf": 5.9140625,
|
|
"calib/ece": 0.26156626506024083,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.22088353413654618,
|
|
"calib/gap": 0.0783507751937984,
|
|
"calib/mean_conf": 0.7786746987951807,
|
|
"calib/mu_c": 0.8164341085271317,
|
|
"calib/mu_w": 0.7380833333333333,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2610843373493975,
|
|
"calib/std_conf": 0.1617631598415949,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4414387464387464,
|
|
"calib/step_q_c_n": 702.0,
|
|
"calib/step_q_gap": 0.07308899274416514,
|
|
"calib/step_q_w": 0.3683497536945813,
|
|
"calib/step_q_w_n": 812.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2324.0,
|
|
"completions/max_terminated_length": 2324.0,
|
|
"completions/mean_length": 524.58984375,
|
|
"completions/mean_terminated_length": 532.9166870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.13653333333333334,
|
|
"grad_norm": 0.15617628395557404,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": -0.1302,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03690294548869133,
|
|
"mask/share_reasoning": 0.8143789172172546,
|
|
"mask/share_step_conf": 0.13309314846992493,
|
|
"num_tokens": 32237863.0,
|
|
"reward": 1.115687608718872,
|
|
"reward_std": 0.20451577007770538,
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
|
"rewards/final_brier_reward_step": 0.6724511981010437,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.8424077033996582,
|
|
"step": 128
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7323572635650635,
|
|
"adv/mean_abs_reasoning": 0.3838053047657013,
|
|
"adv/mean_abs_step_conf": 0.7599761486053467,
|
|
"adv/ratio_final_to_reasoning": 1.9081478407708306,
|
|
"adv/ratio_step_to_reasoning": 1.9801085059761838,
|
|
"adv/std_final_conf": 0.8994174003601074,
|
|
"adv/std_reasoning": 0.6403990983963013,
|
|
"adv/std_step_conf": 0.9336506128311157,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6279545454545454,
|
|
"calib/avg_num_step_conf": 6.3125,
|
|
"calib/ece": 0.18479999999999996,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.124,
|
|
"calib/gap": 0.09089610389610414,
|
|
"calib/mean_conf": 0.73672,
|
|
"calib/mu_c": 0.7767142857142858,
|
|
"calib/mu_w": 0.6858181818181817,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.18075999999999995,
|
|
"calib/std_conf": 0.16621564787949417,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.43683823529411764,
|
|
"calib/step_q_c_n": 816.0,
|
|
"calib/step_q_gap": 0.041325735294117705,
|
|
"calib/step_q_w": 0.39551249999999993,
|
|
"calib/step_q_w_n": 800.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2545.0,
|
|
"completions/max_terminated_length": 2545.0,
|
|
"completions/mean_length": 485.7890625,
|
|
"completions/mean_terminated_length": 489.6141662597656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 139.0,
|
|
"epoch": 0.1376,
|
|
"grad_norm": 0.15342004597187042,
|
|
"learning_rate": 1.9722222222222224e-06,
|
|
"loss": -0.0737,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03681214898824692,
|
|
"mask/share_reasoning": 0.8119223117828369,
|
|
"mask/share_step_conf": 0.14345306158065796,
|
|
"num_tokens": 32464609.0,
|
|
"reward": 1.1368520259857178,
|
|
"reward_std": 0.15180076658725739,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.7222031354904175,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8306880593299866,
|
|
"step": 129
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7020096182823181,
|
|
"adv/mean_abs_reasoning": 0.2727423906326294,
|
|
"adv/mean_abs_step_conf": 0.743945837020874,
|
|
"adv/ratio_final_to_reasoning": 2.5738925901983847,
|
|
"adv/ratio_step_to_reasoning": 2.727650202431981,
|
|
"adv/std_final_conf": 0.9080822467803955,
|
|
"adv/std_reasoning": 0.5726157426834106,
|
|
"adv/std_step_conf": 0.9340441226959229,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6890972222222222,
|
|
"calib/avg_num_step_conf": 6.19921875,
|
|
"calib/ece": 0.05103999999999999,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.116,
|
|
"calib/gap": 0.17471527777777762,
|
|
"calib/mean_conf": 0.69104,
|
|
"calib/mu_c": 0.7539374999999999,
|
|
"calib/mu_w": 0.5792222222222223,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.05103999999999999,
|
|
"calib/std_conf": 0.20370497882967908,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4516629464285714,
|
|
"calib/step_q_c_n": 896.0,
|
|
"calib/step_q_gap": 0.08234312008993178,
|
|
"calib/step_q_w": 0.36931982633863963,
|
|
"calib/step_q_w_n": 691.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3032.0,
|
|
"completions/max_terminated_length": 3032.0,
|
|
"completions/mean_length": 477.81640625,
|
|
"completions/mean_terminated_length": 479.6902160644531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 106.0,
|
|
"epoch": 0.13866666666666666,
|
|
"grad_norm": 0.16025367379188538,
|
|
"learning_rate": 1.944444444444445e-06,
|
|
"loss": 0.0115,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03768278285861015,
|
|
"mask/share_reasoning": 0.8125240802764893,
|
|
"mask/share_step_conf": 0.1458868533372879,
|
|
"num_tokens": 32692218.0,
|
|
"reward": 1.1927818059921265,
|
|
"reward_std": 0.12730564177036285,
|
|
"rewards/accuracy_reward_step": 0.62890625,
|
|
"rewards/final_brier_reward_step": 0.7871171832084656,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.851568341255188,
|
|
"step": 130
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6797161102294922,
|
|
"adv/mean_abs_reasoning": 0.3434407413005829,
|
|
"adv/mean_abs_step_conf": 0.7750881910324097,
|
|
"adv/ratio_final_to_reasoning": 1.9791365102912983,
|
|
"adv/ratio_step_to_reasoning": 2.256832395880617,
|
|
"adv/std_final_conf": 0.8826343417167664,
|
|
"adv/std_reasoning": 0.6402589678764343,
|
|
"adv/std_step_conf": 0.9340447187423706,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7046836589468236,
|
|
"calib/avg_num_step_conf": 6.55078125,
|
|
"calib/ece": 0.1591967871485945,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.060240963855421686,
|
|
"calib/gap": 0.1846002070125501,
|
|
"calib/mean_conf": 0.6292369477911647,
|
|
"calib/mu_c": 0.7263559322033898,
|
|
"calib/mu_w": 0.5417557251908397,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.157269076305221,
|
|
"calib/std_conf": 0.2165441886147318,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.42487394957983193,
|
|
"calib/step_q_c_n": 714.0,
|
|
"calib/step_q_gap": 0.04866418841679976,
|
|
"calib/step_q_w": 0.3762097611630322,
|
|
"calib/step_q_w_n": 963.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2812.0,
|
|
"completions/max_terminated_length": 2812.0,
|
|
"completions/mean_length": 470.01171875,
|
|
"completions/mean_terminated_length": 475.5849914550781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 161.0,
|
|
"epoch": 0.13973333333333332,
|
|
"grad_norm": 0.11347653716802597,
|
|
"learning_rate": 1.916666666666667e-06,
|
|
"loss": -0.0434,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.035636741667985916,
|
|
"mask/share_reasoning": 0.8070310950279236,
|
|
"mask/share_step_conf": 0.1456134170293808,
|
|
"num_tokens": 32918749.0,
|
|
"reward": 1.1516947746276855,
|
|
"reward_std": 0.12257316708564758,
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
|
"rewards/final_brier_reward_step": 0.7506062388420105,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8440431356430054,
|
|
"step": 131
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7347065806388855,
|
|
"adv/mean_abs_reasoning": 0.5124211311340332,
|
|
"adv/mean_abs_step_conf": 0.7653183937072754,
|
|
"adv/ratio_final_to_reasoning": 1.4337944631848318,
|
|
"adv/ratio_step_to_reasoning": 1.4935340234965686,
|
|
"adv/std_final_conf": 0.889552116394043,
|
|
"adv/std_reasoning": 0.7576735615730286,
|
|
"adv/std_step_conf": 0.9337661266326904,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7162384598869249,
|
|
"calib/avg_num_step_conf": 7.77734375,
|
|
"calib/ece": 0.06260162601626003,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.14227642276422764,
|
|
"calib/gap": 0.20626207686252068,
|
|
"calib/mean_conf": 0.6051219512195123,
|
|
"calib/mu_c": 0.6797452229299363,
|
|
"calib/mu_w": 0.47348314606741565,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.01475609756097548,
|
|
"calib/std_conf": 0.2543487326699838,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4245586708203531,
|
|
"calib/step_q_c_n": 963.0,
|
|
"calib/step_q_gap": 0.10524026770448164,
|
|
"calib/step_q_w": 0.31931840311587145,
|
|
"calib/step_q_w_n": 1027.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 3029.0,
|
|
"completions/max_terminated_length": 3029.0,
|
|
"completions/mean_length": 580.7734375,
|
|
"completions/mean_terminated_length": 589.9921264648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.1408,
|
|
"grad_norm": 0.1195579469203949,
|
|
"learning_rate": 1.888888888888889e-06,
|
|
"loss": -0.0584,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.03593384847044945,
|
|
"mask/share_reasoning": 0.7987890243530273,
|
|
"mask/share_step_conf": 0.1496521532535553,
|
|
"num_tokens": 33173019.0,
|
|
"reward": 1.1788052320480347,
|
|
"reward_std": 0.18831761181354523,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/final_brier_reward_step": 0.7660914063453674,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"rewards/step_l2_reward": 0.8511168360710144,
|
|
"step": 132
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7405315041542053,
|
|
"adv/mean_abs_reasoning": 0.45831504464149475,
|
|
"adv/mean_abs_step_conf": 0.751035749912262,
|
|
"adv/ratio_final_to_reasoning": 1.6157695733803965,
|
|
"adv/ratio_step_to_reasoning": 1.6386888423000394,
|
|
"adv/std_final_conf": 0.9202378988265991,
|
|
"adv/std_reasoning": 0.7393971681594849,
|
|
"adv/std_step_conf": 0.9354689121246338,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6978641456582634,
|
|
"calib/avg_num_step_conf": 7.25390625,
|
|
"calib/ece": 0.09942148760330577,
|
|
"calib/final_conf_rate": 0.9453125,
|
|
"calib/format_rate": 0.94140625,
|
|
"calib/frac_conf_gt_0.9": 0.06611570247933884,
|
|
"calib/gap": 0.15078011204481806,
|
|
"calib/mean_conf": 0.520909090909091,
|
|
"calib/mu_c": 0.6081372549019609,
|
|
"calib/mu_w": 0.45735714285714285,
|
|
"calib/nonempty_final_conf_rate": 0.9453125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.09942148760330577,
|
|
"calib/std_conf": 0.22624129589211192,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4150064516129032,
|
|
"calib/step_q_c_n": 775.0,
|
|
"calib/step_q_gap": 0.04018205235227473,
|
|
"calib/step_q_w": 0.3748243992606285,
|
|
"calib/step_q_w_n": 1082.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03515625,
|
|
"completions/max_length": 2730.0,
|
|
"completions/max_terminated_length": 2730.0,
|
|
"completions/mean_length": 574.1484375,
|
|
"completions/mean_terminated_length": 595.06884765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.14186666666666667,
|
|
"grad_norm": 0.12294993549585342,
|
|
"learning_rate": 1.8611111111111113e-06,
|
|
"loss": -0.219,
|
|
"mask/has_final_conf_rate": 0.9453125,
|
|
"mask/share_final_conf": 0.03028670698404312,
|
|
"mask/share_reasoning": 0.8093153238296509,
|
|
"mask/share_step_conf": 0.12524168193340302,
|
|
"num_tokens": 33426345.0,
|
|
"reward": 1.1139624118804932,
|
|
"reward_std": 0.2063348889350891,
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
|
"rewards/final_brier_reward_step": 0.7242355346679688,
|
|
"rewards/format_reward_step": 0.94140625,
|
|
"rewards/step_l2_reward": 0.8222510814666748,
|
|
"step": 133
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.719078779220581,
|
|
"adv/mean_abs_reasoning": 0.5342764854431152,
|
|
"adv/mean_abs_step_conf": 0.7093563675880432,
|
|
"adv/ratio_final_to_reasoning": 1.3458926207920148,
|
|
"adv/ratio_step_to_reasoning": 1.3276952793453398,
|
|
"adv/std_final_conf": 0.9208303093910217,
|
|
"adv/std_reasoning": 0.7929035425186157,
|
|
"adv/std_step_conf": 0.9340146780014038,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6706476649270708,
|
|
"calib/avg_num_step_conf": 7.1875,
|
|
"calib/ece": 0.11578947368421054,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.1214574898785425,
|
|
"calib/gap": 0.16008229626655962,
|
|
"calib/mean_conf": 0.5161943319838057,
|
|
"calib/mu_c": 0.5848936170212766,
|
|
"calib/mu_w": 0.424811320754717,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.030566801619433225,
|
|
"calib/std_conf": 0.25029717294785286,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4097724810400866,
|
|
"calib/step_q_c_n": 923.0,
|
|
"calib/step_q_gap": 0.09069941669984671,
|
|
"calib/step_q_w": 0.3190730643402399,
|
|
"calib/step_q_w_n": 917.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2678.0,
|
|
"completions/max_terminated_length": 2678.0,
|
|
"completions/mean_length": 618.80859375,
|
|
"completions/mean_terminated_length": 626.146240234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 80.0,
|
|
"epoch": 0.14293333333333333,
|
|
"grad_norm": 0.1309865415096283,
|
|
"learning_rate": 1.8333333333333333e-06,
|
|
"loss": -0.1192,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.029283631592988968,
|
|
"mask/share_reasoning": 0.8341566324234009,
|
|
"mask/share_step_conf": 0.12484101951122284,
|
|
"num_tokens": 33693712.0,
|
|
"reward": 1.156282901763916,
|
|
"reward_std": 0.2100716233253479,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/final_brier_reward_step": 0.7374527454376221,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/step_l2_reward": 0.8479920625686646,
|
|
"step": 134
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7240814566612244,
|
|
"adv/mean_abs_reasoning": 0.4685104787349701,
|
|
"adv/mean_abs_step_conf": 0.7005765438079834,
|
|
"adv/ratio_final_to_reasoning": 1.5454968234997093,
|
|
"adv/ratio_step_to_reasoning": 1.495327373892719,
|
|
"adv/std_final_conf": 0.905745804309845,
|
|
"adv/std_reasoning": 0.739366888999939,
|
|
"adv/std_step_conf": 0.9335731863975525,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6283891547049442,
|
|
"calib/avg_num_step_conf": 7.11328125,
|
|
"calib/ece": 0.16039840637450203,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.23107569721115537,
|
|
"calib/gap": 0.12990829346092503,
|
|
"calib/mean_conf": 0.6083665338645419,
|
|
"calib/mu_c": 0.6596052631578947,
|
|
"calib/mu_w": 0.5296969696969697,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.08159362549800796,
|
|
"calib/std_conf": 0.2731039692196431,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4204934541792548,
|
|
"calib/step_q_c_n": 993.0,
|
|
"calib/step_q_gap": 0.08036060393770894,
|
|
"calib/step_q_w": 0.3401328502415459,
|
|
"calib/step_q_w_n": 828.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2583.0,
|
|
"completions/max_terminated_length": 2583.0,
|
|
"completions/mean_length": 566.87890625,
|
|
"completions/mean_terminated_length": 571.342529296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.16953939199447632,
|
|
"learning_rate": 1.8055555555555557e-06,
|
|
"loss": -0.0455,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.033975474536418915,
|
|
"mask/share_reasoning": 0.8183913230895996,
|
|
"mask/share_step_conf": 0.13982072472572327,
|
|
"num_tokens": 33944713.0,
|
|
"reward": 1.1777615547180176,
|
|
"reward_std": 0.16259250044822693,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/final_brier_reward_step": 0.732399582862854,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8727073073387146,
|
|
"step": 135
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6876845955848694,
|
|
"adv/mean_abs_reasoning": 0.4227200746536255,
|
|
"adv/mean_abs_step_conf": 0.7711788415908813,
|
|
"adv/ratio_final_to_reasoning": 1.6268084645574363,
|
|
"adv/ratio_step_to_reasoning": 1.8243250979333807,
|
|
"adv/std_final_conf": 0.890141487121582,
|
|
"adv/std_reasoning": 0.7012588381767273,
|
|
"adv/std_step_conf": 0.9338822960853577,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7928475033738192,
|
|
"calib/avg_num_step_conf": 7.93359375,
|
|
"calib/ece": 0.12668000000000007,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.324,
|
|
"calib/gap": 0.3159302101407366,
|
|
"calib/mean_conf": 0.59372,
|
|
"calib/mu_c": 0.7617948717948719,
|
|
"calib/mu_w": 0.44586466165413535,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.1262000000000001,
|
|
"calib/std_conf": 0.3059218226933149,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4130848329048843,
|
|
"calib/step_q_c_n": 778.0,
|
|
"calib/step_q_gap": 0.10942162460480448,
|
|
"calib/step_q_w": 0.3036632083000798,
|
|
"calib/step_q_w_n": 1253.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2555.0,
|
|
"completions/max_terminated_length": 2555.0,
|
|
"completions/mean_length": 546.58203125,
|
|
"completions/mean_terminated_length": 553.0632934570312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.14506666666666668,
|
|
"grad_norm": 0.17194612324237823,
|
|
"learning_rate": 1.777777777777778e-06,
|
|
"loss": -0.0445,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03519256412982941,
|
|
"mask/share_reasoning": 0.8008227348327637,
|
|
"mask/share_step_conf": 0.15226593613624573,
|
|
"num_tokens": 34193126.0,
|
|
"reward": 1.1875487565994263,
|
|
"reward_std": 0.15522706508636475,
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
|
"rewards/final_brier_reward_step": 0.7767167687416077,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8749620914459229,
|
|
"step": 136
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6520270109176636,
|
|
"adv/mean_abs_reasoning": 0.3962523937225342,
|
|
"adv/mean_abs_step_conf": 0.7401241064071655,
|
|
"adv/ratio_final_to_reasoning": 1.6454840935906854,
|
|
"adv/ratio_step_to_reasoning": 1.8678098053974632,
|
|
"adv/std_final_conf": 0.8592166304588318,
|
|
"adv/std_reasoning": 0.7013719081878662,
|
|
"adv/std_step_conf": 0.9343339800834656,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6180776989653153,
|
|
"calib/avg_num_step_conf": 7.67578125,
|
|
"calib/ece": 0.2571370967741937,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.4959677419354839,
|
|
"calib/gap": 0.11666493134639155,
|
|
"calib/mean_conf": 0.7086693548387096,
|
|
"calib/mu_c": 0.7655905511811025,
|
|
"calib/mu_w": 0.648925619834711,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.22685483870967757,
|
|
"calib/std_conf": 0.3003322144906771,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4082,
|
|
"calib/step_q_c_n": 900.0,
|
|
"calib/step_q_gap": 0.062425352112676136,
|
|
"calib/step_q_w": 0.34577464788732387,
|
|
"calib/step_q_w_n": 1065.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2719.0,
|
|
"completions/max_terminated_length": 2719.0,
|
|
"completions/mean_length": 543.8359375,
|
|
"completions/mean_terminated_length": 550.2846069335938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.14613333333333334,
|
|
"grad_norm": 0.11026394367218018,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": -0.082,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.033572569489479065,
|
|
"mask/share_reasoning": 0.7995332479476929,
|
|
"mask/share_step_conf": 0.15517544746398926,
|
|
"num_tokens": 34439332.0,
|
|
"reward": 1.1067839860916138,
|
|
"reward_std": 0.22292152047157288,
|
|
"rewards/accuracy_reward_step": 0.5,
|
|
"rewards/final_brier_reward_step": 0.6548605561256409,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8438256978988647,
|
|
"step": 137
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5672576427459717,
|
|
"adv/mean_abs_reasoning": 0.46197739243507385,
|
|
"adv/mean_abs_step_conf": 0.7603617310523987,
|
|
"adv/ratio_final_to_reasoning": 1.2278904812981597,
|
|
"adv/ratio_step_to_reasoning": 1.6458851526143883,
|
|
"adv/std_final_conf": 0.799165666103363,
|
|
"adv/std_reasoning": 0.7206169366836548,
|
|
"adv/std_step_conf": 0.933541476726532,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7085352622061483,
|
|
"calib/avg_num_step_conf": 7.29296875,
|
|
"calib/ece": 0.14988188976377964,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5748031496062992,
|
|
"calib/gap": 0.2578669077757687,
|
|
"calib/mean_conf": 0.733740157480315,
|
|
"calib/mu_c": 0.8139428571428573,
|
|
"calib/mu_w": 0.5560759493670886,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.09732283464566938,
|
|
"calib/std_conf": 0.3059408288371813,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.39558082859463856,
|
|
"calib/step_q_c_n": 1231.0,
|
|
"calib/step_q_gap": 0.06086384746256307,
|
|
"calib/step_q_w": 0.3347169811320755,
|
|
"calib/step_q_w_n": 636.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1635.0,
|
|
"completions/max_terminated_length": 1635.0,
|
|
"completions/mean_length": 515.046875,
|
|
"completions/mean_terminated_length": 517.0667114257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 111.0,
|
|
"epoch": 0.1472,
|
|
"grad_norm": 0.07487356662750244,
|
|
"learning_rate": 1.7222222222222224e-06,
|
|
"loss": -0.1116,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.036223817616701126,
|
|
"mask/share_reasoning": 0.8065900206565857,
|
|
"mask/share_step_conf": 0.15327994525432587,
|
|
"num_tokens": 34675520.0,
|
|
"reward": 1.2233290672302246,
|
|
"reward_std": 0.16680964827537537,
|
|
"rewards/accuracy_reward_step": 0.68359375,
|
|
"rewards/final_brier_reward_step": 0.7919644117355347,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8802123665809631,
|
|
"step": 138
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6250104904174805,
|
|
"adv/mean_abs_reasoning": 0.4324566721916199,
|
|
"adv/mean_abs_step_conf": 0.7376917004585266,
|
|
"adv/ratio_final_to_reasoning": 1.4452557460844095,
|
|
"adv/ratio_step_to_reasoning": 1.7058164387198038,
|
|
"adv/std_final_conf": 0.8434977531433105,
|
|
"adv/std_reasoning": 0.7205010652542114,
|
|
"adv/std_step_conf": 0.9194177985191345,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6968005952380952,
|
|
"calib/avg_num_step_conf": 5.95703125,
|
|
"calib/ece": 0.20024000000000017,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.704,
|
|
"calib/gap": 0.2039502164502166,
|
|
"calib/mean_conf": 0.8148000000000001,
|
|
"calib/mu_c": 0.8931168831168832,
|
|
"calib/mu_w": 0.6891666666666666,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.19952000000000017,
|
|
"calib/std_conf": 0.26922585314192987,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.436105651105651,
|
|
"calib/step_q_c_n": 814.0,
|
|
"calib/step_q_gap": 0.08655572142913909,
|
|
"calib/step_q_w": 0.34954992967651194,
|
|
"calib/step_q_w_n": 711.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2971.0,
|
|
"completions/max_terminated_length": 2971.0,
|
|
"completions/mean_length": 475.13671875,
|
|
"completions/mean_terminated_length": 478.8779602050781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 119.0,
|
|
"epoch": 0.14826666666666666,
|
|
"grad_norm": 0.14141923189163208,
|
|
"learning_rate": 1.6944444444444446e-06,
|
|
"loss": 0.0495,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03738235682249069,
|
|
"mask/share_reasoning": 0.8101898431777954,
|
|
"mask/share_step_conf": 0.1446153074502945,
|
|
"num_tokens": 34900251.0,
|
|
"reward": 1.1667213439941406,
|
|
"reward_std": 0.20426005125045776,
|
|
"rewards/accuracy_reward_step": 0.6015625,
|
|
"rewards/final_brier_reward_step": 0.7265039086341858,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8613967895507812,
|
|
"step": 139
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4288480281829834,
|
|
"adv/mean_abs_reasoning": 0.33103907108306885,
|
|
"adv/mean_abs_step_conf": 0.7590134143829346,
|
|
"adv/ratio_final_to_reasoning": 1.2954604626575061,
|
|
"adv/ratio_step_to_reasoning": 2.2928212428208288,
|
|
"adv/std_final_conf": 0.6866632699966431,
|
|
"adv/std_reasoning": 0.6185633540153503,
|
|
"adv/std_step_conf": 0.9310550689697266,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7131553708439897,
|
|
"calib/avg_num_step_conf": 5.73828125,
|
|
"calib/ece": 0.15658730158730189,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.7936507936507936,
|
|
"calib/gap": 0.2169980818414322,
|
|
"calib/mean_conf": 0.8618253968253968,
|
|
"calib/mu_c": 0.9203804347826088,
|
|
"calib/mu_w": 0.7033823529411766,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.14412698412698444,
|
|
"calib/std_conf": 0.24140264985082757,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4601015228426396,
|
|
"calib/step_q_c_n": 985.0,
|
|
"calib/step_q_gap": 0.12627920879305282,
|
|
"calib/step_q_w": 0.3338223140495868,
|
|
"calib/step_q_w_n": 484.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2959.0,
|
|
"completions/max_terminated_length": 2959.0,
|
|
"completions/mean_length": 485.9453125,
|
|
"completions/mean_terminated_length": 487.85101318359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 101.0,
|
|
"epoch": 0.14933333333333335,
|
|
"grad_norm": 0.1173972561955452,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 0.0124,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03842870146036148,
|
|
"mask/share_reasoning": 0.8230201005935669,
|
|
"mask/share_step_conf": 0.13464492559432983,
|
|
"num_tokens": 35129669.0,
|
|
"reward": 1.235023021697998,
|
|
"reward_std": 0.13625219464302063,
|
|
"rewards/accuracy_reward_step": 0.71875,
|
|
"rewards/final_brier_reward_step": 0.8001695275306702,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8861677646636963,
|
|
"step": 140
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5950652956962585,
|
|
"adv/mean_abs_reasoning": 0.48388898372650146,
|
|
"adv/mean_abs_step_conf": 0.7792325019836426,
|
|
"adv/ratio_final_to_reasoning": 1.229755823564264,
|
|
"adv/ratio_step_to_reasoning": 1.6103538790708904,
|
|
"adv/std_final_conf": 0.8279574513435364,
|
|
"adv/std_reasoning": 0.7575324177742004,
|
|
"adv/std_step_conf": 0.9340521097183228,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6933891612200437,
|
|
"calib/avg_num_step_conf": 6.671875,
|
|
"calib/ece": 0.26662650602409677,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.8232931726907631,
|
|
"calib/gap": 0.2202246732026144,
|
|
"calib/mean_conf": 0.8759437751004017,
|
|
"calib/mu_c": 0.9608496732026145,
|
|
"calib/mu_w": 0.7406250000000001,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2640562248995988,
|
|
"calib/std_conf": 0.23709082878886054,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.41911298838437167,
|
|
"calib/step_q_c_n": 947.0,
|
|
"calib/step_q_gap": 0.09636660205060033,
|
|
"calib/step_q_w": 0.32274638633377134,
|
|
"calib/step_q_w_n": 761.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2538.0,
|
|
"completions/max_terminated_length": 2538.0,
|
|
"completions/mean_length": 569.62109375,
|
|
"completions/mean_terminated_length": 574.1063232421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 137.0,
|
|
"epoch": 0.1504,
|
|
"grad_norm": 0.08798349648714066,
|
|
"learning_rate": 1.638888888888889e-06,
|
|
"loss": 0.0634,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03277936577796936,
|
|
"mask/share_reasoning": 0.8236855864524841,
|
|
"mask/share_step_conf": 0.1357225626707077,
|
|
"num_tokens": 35382588.0,
|
|
"reward": 1.165555715560913,
|
|
"reward_std": 0.2309819459915161,
|
|
"rewards/accuracy_reward_step": 0.59765625,
|
|
"rewards/final_brier_reward_step": 0.7223124504089355,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/step_l2_reward": 0.8636784553527832,
|
|
"step": 141
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5308142900466919,
|
|
"adv/mean_abs_reasoning": 0.3850702941417694,
|
|
"adv/mean_abs_step_conf": 0.7345419526100159,
|
|
"adv/ratio_final_to_reasoning": 1.3784867285848454,
|
|
"adv/ratio_step_to_reasoning": 1.907552890432995,
|
|
"adv/std_final_conf": 0.7917589545249939,
|
|
"adv/std_reasoning": 0.6814852356910706,
|
|
"adv/std_step_conf": 0.9330111742019653,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5695026587425712,
|
|
"calib/avg_num_step_conf": 6.98828125,
|
|
"calib/ece": 0.4083464566929137,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.9330708661417323,
|
|
"calib/gap": 0.02395871129183602,
|
|
"calib/mean_conf": 0.9468503937007876,
|
|
"calib/mu_c": 0.957697841726619,
|
|
"calib/mu_w": 0.933739130434783,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4039763779527562,
|
|
"calib/std_conf": 0.13166230238729587,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4570890410958904,
|
|
"calib/step_q_c_n": 876.0,
|
|
"calib/step_q_gap": 0.1062337779379956,
|
|
"calib/step_q_w": 0.3508552631578948,
|
|
"calib/step_q_w_n": 912.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2279.0,
|
|
"completions/max_terminated_length": 2279.0,
|
|
"completions/mean_length": 514.02734375,
|
|
"completions/mean_terminated_length": 518.0748291015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.15146666666666667,
|
|
"grad_norm": 0.07767757773399353,
|
|
"learning_rate": 1.6111111111111113e-06,
|
|
"loss": -0.0422,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.034924302250146866,
|
|
"mask/share_reasoning": 0.8092361688613892,
|
|
"mask/share_step_conf": 0.14802706241607666,
|
|
"num_tokens": 35619339.0,
|
|
"reward": 1.0883498191833496,
|
|
"reward_std": 0.17449073493480682,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/final_brier_reward_step": 0.5785929560661316,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8612378835678101,
|
|
"step": 142
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4907079339027405,
|
|
"adv/mean_abs_reasoning": 0.43104416131973267,
|
|
"adv/mean_abs_step_conf": 0.7607996463775635,
|
|
"adv/ratio_final_to_reasoning": 1.1384168443445204,
|
|
"adv/ratio_step_to_reasoning": 1.7650155474747988,
|
|
"adv/std_final_conf": 0.75885409116745,
|
|
"adv/std_reasoning": 0.7205471992492676,
|
|
"adv/std_step_conf": 0.9344484806060791,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.6228807812288079,
|
|
"calib/avg_num_step_conf": 7.0703125,
|
|
"calib/ece": 0.35720647773279385,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.9392712550607287,
|
|
"calib/gap": 0.07718974637189746,
|
|
"calib/mean_conf": 0.9482995951417007,
|
|
"calib/mu_c": 0.9798630136986304,
|
|
"calib/mu_w": 0.9026732673267329,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.35720647773279385,
|
|
"calib/std_conf": 0.13272427097218972,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4336813441483198,
|
|
"calib/step_q_c_n": 863.0,
|
|
"calib/step_q_gap": 0.1462367823742966,
|
|
"calib/step_q_w": 0.2874445617740232,
|
|
"calib/step_q_w_n": 947.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2668.0,
|
|
"completions/max_terminated_length": 2668.0,
|
|
"completions/mean_length": 551.10546875,
|
|
"completions/mean_terminated_length": 555.4448852539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.15253333333333333,
|
|
"grad_norm": 0.06190464645624161,
|
|
"learning_rate": 1.5833333333333333e-06,
|
|
"loss": 0.0387,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.033856652677059174,
|
|
"mask/share_reasoning": 0.8064756989479065,
|
|
"mask/share_step_conf": 0.15185511112213135,
|
|
"num_tokens": 35867758.0,
|
|
"reward": 1.1040698289871216,
|
|
"reward_std": 0.2155992090702057,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/final_brier_reward_step": 0.6275339722633362,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8490495681762695,
|
|
"step": 143
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5087292194366455,
|
|
"adv/mean_abs_reasoning": 0.4547651410102844,
|
|
"adv/mean_abs_step_conf": 0.7651829123497009,
|
|
"adv/ratio_final_to_reasoning": 1.1186636211968162,
|
|
"adv/ratio_step_to_reasoning": 1.682589194611107,
|
|
"adv/std_final_conf": 0.7767125964164734,
|
|
"adv/std_reasoning": 0.739241898059845,
|
|
"adv/std_step_conf": 0.934445858001709,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5587776517300056,
|
|
"calib/avg_num_step_conf": 6.9609375,
|
|
"calib/ece": 0.2990944881889768,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.9645669291338582,
|
|
"calib/gap": 0.015146057855927375,
|
|
"calib/mean_conf": 0.9558661417322838,
|
|
"calib/mu_c": 0.9607558139534887,
|
|
"calib/mu_w": 0.9456097560975614,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.288897637795276,
|
|
"calib/std_conf": 0.12398500431327406,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4573327222731439,
|
|
"calib/step_q_c_n": 1091.0,
|
|
"calib/step_q_gap": 0.11548033442943917,
|
|
"calib/step_q_w": 0.34185238784370475,
|
|
"calib/step_q_w_n": 691.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2965.0,
|
|
"completions/max_terminated_length": 2965.0,
|
|
"completions/mean_length": 510.98046875,
|
|
"completions/mean_terminated_length": 512.9843139648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.1536,
|
|
"grad_norm": 0.06918393820524216,
|
|
"learning_rate": 1.5555555555555558e-06,
|
|
"loss": 0.038,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03654374182224274,
|
|
"mask/share_reasoning": 0.8113521337509155,
|
|
"mask/share_step_conf": 0.14819784462451935,
|
|
"num_tokens": 36102697.0,
|
|
"reward": 1.1596009731292725,
|
|
"reward_std": 0.20381799340248108,
|
|
"rewards/accuracy_reward_step": 0.671875,
|
|
"rewards/final_brier_reward_step": 0.6893793344497681,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8651941418647766,
|
|
"step": 144
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6027562618255615,
|
|
"adv/mean_abs_reasoning": 0.6014331579208374,
|
|
"adv/mean_abs_step_conf": 0.7539108991622925,
|
|
"adv/ratio_final_to_reasoning": 1.0021999184569372,
|
|
"adv/ratio_step_to_reasoning": 1.2535240021826743,
|
|
"adv/std_final_conf": 0.8105014562606812,
|
|
"adv/std_reasoning": 0.8099634647369385,
|
|
"adv/std_step_conf": 0.92684006690979,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.52,
|
|
"calib/avg_num_step_conf": 6.75,
|
|
"calib/ece": 0.3752589641434265,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.9920318725099602,
|
|
"calib/gap": 0.00790000000000013,
|
|
"calib/mean_conf": 0.9768525896414345,
|
|
"calib/mu_c": 0.9800000000000005,
|
|
"calib/mu_w": 0.9721000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3752589641434265,
|
|
"calib/std_conf": 0.039690713928804806,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4916683937823835,
|
|
"calib/step_q_c_n": 965.0,
|
|
"calib/step_q_gap": 0.10609827582694448,
|
|
"calib/step_q_w": 0.385570117955439,
|
|
"calib/step_q_w_n": 763.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2448.0,
|
|
"completions/max_terminated_length": 2448.0,
|
|
"completions/mean_length": 474.296875,
|
|
"completions/mean_terminated_length": 481.825439453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 121.0,
|
|
"epoch": 0.15466666666666667,
|
|
"grad_norm": 0.06234937906265259,
|
|
"learning_rate": 1.527777777777778e-06,
|
|
"loss": 0.0024,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.0364404022693634,
|
|
"mask/share_reasoning": 0.7930680513381958,
|
|
"mask/share_step_conf": 0.1548665463924408,
|
|
"num_tokens": 36326821.0,
|
|
"reward": 1.0898528099060059,
|
|
"reward_std": 0.2445923089981079,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.6094160079956055,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8380056619644165,
|
|
"step": 145
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5295411348342896,
|
|
"adv/mean_abs_reasoning": 0.4761459529399872,
|
|
"adv/mean_abs_step_conf": 0.7413994073867798,
|
|
"adv/ratio_final_to_reasoning": 1.112140366970697,
|
|
"adv/ratio_step_to_reasoning": 1.5570843410701525,
|
|
"adv/std_final_conf": 0.7750407457351685,
|
|
"adv/std_reasoning": 0.7393215894699097,
|
|
"adv/std_step_conf": 0.9353768229484558,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5346930995043843,
|
|
"calib/avg_num_step_conf": 6.53515625,
|
|
"calib/ece": 0.49091633466135476,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.9960159362549801,
|
|
"calib/gap": 0.005731986275257617,
|
|
"calib/mean_conf": 0.9769721115537852,
|
|
"calib/mu_c": 0.9799180327868856,
|
|
"calib/mu_w": 0.974186046511628,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.49091633466135476,
|
|
"calib/std_conf": 0.04159656383203995,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4806713286713286,
|
|
"calib/step_q_c_n": 715.0,
|
|
"calib/step_q_gap": 0.09888281092602591,
|
|
"calib/step_q_w": 0.3817885177453027,
|
|
"calib/step_q_w_n": 958.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2716.0,
|
|
"completions/max_terminated_length": 2716.0,
|
|
"completions/mean_length": 520.83203125,
|
|
"completions/mean_terminated_length": 527.0079345703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.15573333333333333,
|
|
"grad_norm": 0.06934022903442383,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": -0.1303,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03379040211439133,
|
|
"mask/share_reasoning": 0.8066377639770508,
|
|
"mask/share_step_conf": 0.1478530764579773,
|
|
"num_tokens": 36567370.0,
|
|
"reward": 1.0298337936401367,
|
|
"reward_std": 0.22394514083862305,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/final_brier_reward_step": 0.5003616809844971,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.844745397567749,
|
|
"step": 146
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.38683611154556274,
|
|
"adv/mean_abs_reasoning": 0.3548612594604492,
|
|
"adv/mean_abs_step_conf": 0.7753262519836426,
|
|
"adv/ratio_final_to_reasoning": 1.0901052206536432,
|
|
"adv/ratio_step_to_reasoning": 2.1848714992515434,
|
|
"adv/std_final_conf": 0.6618477702140808,
|
|
"adv/std_reasoning": 0.6403102874755859,
|
|
"adv/std_step_conf": 0.9351269602775574,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5350567959263611,
|
|
"calib/avg_num_step_conf": 6.9921875,
|
|
"calib/ece": 0.5202811244979922,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.9718875502008032,
|
|
"calib/gap": 0.02156090873482197,
|
|
"calib/mean_conf": 0.9608433734939761,
|
|
"calib/mu_c": 0.9727927927927933,
|
|
"calib/mu_w": 0.9512318840579713,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.517670682730924,
|
|
"calib/std_conf": 0.1125841566811578,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.5180636237897648,
|
|
"calib/step_q_c_n": 723.0,
|
|
"calib/step_q_gap": 0.1042023304439354,
|
|
"calib/step_q_w": 0.4138612933458294,
|
|
"calib/step_q_w_n": 1067.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2475.0,
|
|
"completions/max_terminated_length": 2475.0,
|
|
"completions/mean_length": 521.671875,
|
|
"completions/mean_terminated_length": 527.8577270507812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 93.0,
|
|
"epoch": 0.1568,
|
|
"grad_norm": 0.07205287367105484,
|
|
"learning_rate": 1.4722222222222225e-06,
|
|
"loss": -0.1356,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03373868763446808,
|
|
"mask/share_reasoning": 0.8053416013717651,
|
|
"mask/share_step_conf": 0.14920096099376678,
|
|
"num_tokens": 36804598.0,
|
|
"reward": 0.9906857013702393,
|
|
"reward_std": 0.195381760597229,
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
|
"rewards/final_brier_reward_step": 0.47235190868377686,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8185129165649414,
|
|
"step": 147
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.42417389154434204,
|
|
"adv/mean_abs_reasoning": 0.36322999000549316,
|
|
"adv/mean_abs_step_conf": 0.7607502937316895,
|
|
"adv/ratio_final_to_reasoning": 1.167783231604657,
|
|
"adv/ratio_step_to_reasoning": 2.0944038616419984,
|
|
"adv/std_final_conf": 0.7210102081298828,
|
|
"adv/std_reasoning": 0.6610972285270691,
|
|
"adv/std_step_conf": 0.9344878196716309,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5526422764227642,
|
|
"calib/avg_num_step_conf": 6.359375,
|
|
"calib/ece": 0.3232677165354333,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.984251968503937,
|
|
"calib/gap": 0.031050135501354936,
|
|
"calib/mean_conf": 0.968937007874016,
|
|
"calib/mu_c": 0.9799390243902442,
|
|
"calib/mu_w": 0.9488888888888892,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3232677165354333,
|
|
"calib/std_conf": 0.0875972573575166,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.49106045589692765,
|
|
"calib/step_q_c_n": 1009.0,
|
|
"calib/step_q_gap": 0.11026885654313123,
|
|
"calib/step_q_w": 0.3807915993537964,
|
|
"calib/step_q_w_n": 619.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2382.0,
|
|
"completions/max_terminated_length": 2382.0,
|
|
"completions/mean_length": 484.01953125,
|
|
"completions/mean_terminated_length": 485.91766357421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 95.0,
|
|
"epoch": 0.15786666666666666,
|
|
"grad_norm": 0.08672936260700226,
|
|
"learning_rate": 1.4444444444444445e-06,
|
|
"loss": -0.0994,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.039496589452028275,
|
|
"mask/share_reasoning": 0.7975192070007324,
|
|
"mask/share_step_conf": 0.1590779423713684,
|
|
"num_tokens": 37033619.0,
|
|
"reward": 1.135124683380127,
|
|
"reward_std": 0.17193685472011566,
|
|
"rewards/accuracy_reward_step": 0.640625,
|
|
"rewards/final_brier_reward_step": 0.6679917573928833,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/step_l2_reward": 0.8504635095596313,
|
|
"step": 148
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5512717366218567,
|
|
"adv/mean_abs_reasoning": 0.5307047367095947,
|
|
"adv/mean_abs_step_conf": 0.7301586866378784,
|
|
"adv/ratio_final_to_reasoning": 1.0387541291599898,
|
|
"adv/ratio_step_to_reasoning": 1.3758284713358913,
|
|
"adv/std_final_conf": 0.8100273013114929,
|
|
"adv/std_reasoning": 0.7927494645118713,
|
|
"adv/std_step_conf": 0.9347800612449646,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.531609756097561,
|
|
"calib/avg_num_step_conf": 6.79296875,
|
|
"calib/ece": 0.47762096774193585,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.9879032258064516,
|
|
"calib/gap": 0.012074796747967409,
|
|
"calib/mean_conf": 0.9735887096774196,
|
|
"calib/mu_c": 0.9796747967479679,
|
|
"calib/mu_w": 0.9676000000000005,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.47762096774193585,
|
|
"calib/std_conf": 0.05830627035620779,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.5213084112149532,
|
|
"calib/step_q_c_n": 749.0,
|
|
"calib/step_q_gap": 0.09424780515434716,
|
|
"calib/step_q_w": 0.42706060606060603,
|
|
"calib/step_q_w_n": 990.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2659.0,
|
|
"completions/max_terminated_length": 2659.0,
|
|
"completions/mean_length": 543.52734375,
|
|
"completions/mean_terminated_length": 549.9723510742188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 27.0,
|
|
"epoch": 0.15893333333333334,
|
|
"grad_norm": 0.08807147294282913,
|
|
"learning_rate": 1.4166666666666667e-06,
|
|
"loss": -0.0886,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03507719561457634,
|
|
"mask/share_reasoning": 0.8086420297622681,
|
|
"mask/share_step_conf": 0.1445620059967041,
|
|
"num_tokens": 37277218.0,
|
|
"reward": 1.0231540203094482,
|
|
"reward_std": 0.24548467993736267,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/final_brier_reward_step": 0.50790935754776,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8328907489776611,
|
|
"step": 149
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4628121554851532,
|
|
"adv/mean_abs_reasoning": 0.4400729835033417,
|
|
"adv/mean_abs_step_conf": 0.750774621963501,
|
|
"adv/ratio_final_to_reasoning": 1.051671365510305,
|
|
"adv/ratio_step_to_reasoning": 1.7060229782494702,
|
|
"adv/std_final_conf": 0.7392811179161072,
|
|
"adv/std_reasoning": 0.7205407023429871,
|
|
"adv/std_step_conf": 0.9340653419494629,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5326130586031577,
|
|
"calib/avg_num_step_conf": 6.53515625,
|
|
"calib/ece": 0.3857831325301207,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.9959839357429718,
|
|
"calib/gap": 0.00014784586566762847,
|
|
"calib/mean_conf": 0.9791967871485946,
|
|
"calib/mu_c": 0.9792567567567572,
|
|
"calib/mu_w": 0.9791089108910895,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.3853012048192773,
|
|
"calib/std_conf": 0.006658435911988018,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.5091600454029511,
|
|
"calib/step_q_c_n": 881.0,
|
|
"calib/step_q_gap": 0.12332671206961776,
|
|
"calib/step_q_w": 0.38583333333333336,
|
|
"calib/step_q_w_n": 792.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2896.0,
|
|
"completions/max_terminated_length": 2896.0,
|
|
"completions/mean_length": 465.40625,
|
|
"completions/mean_terminated_length": 472.7936706542969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 125.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.07247061282396317,
|
|
"learning_rate": 1.3888888888888892e-06,
|
|
"loss": -0.0224,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.0417606383562088,
|
|
"mask/share_reasoning": 0.7887001037597656,
|
|
"mask/share_step_conf": 0.15391427278518677,
|
|
"num_tokens": 37501322.0,
|
|
"reward": 1.0706796646118164,
|
|
"reward_std": 0.1999371200799942,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/final_brier_reward_step": 0.5901405811309814,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8284165859222412,
|
|
"step": 150
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5551609396934509,
|
|
"adv/mean_abs_reasoning": 0.4884258508682251,
|
|
"adv/mean_abs_step_conf": 0.7733472585678101,
|
|
"adv/ratio_final_to_reasoning": 1.1366329990654622,
|
|
"adv/ratio_step_to_reasoning": 1.5833462892946986,
|
|
"adv/std_final_conf": 0.7760764360427856,
|
|
"adv/std_reasoning": 0.7393485307693481,
|
|
"adv/std_step_conf": 0.9337877631187439,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.5478505291005291,
|
|
"calib/avg_num_step_conf": 6.484375,
|
|
"calib/ece": 0.5027125506072876,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.9392712550607287,
|
|
"calib/gap": 0.016022486772486966,
|
|
"calib/mean_conf": 0.9396356275303648,
|
|
"calib/mu_c": 0.9483928571428574,
|
|
"calib/mu_w": 0.9323703703703704,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.49445344129554675,
|
|
"calib/std_conf": 0.1601702940721179,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4606629055007052,
|
|
"calib/step_q_c_n": 709.0,
|
|
"calib/step_q_gap": 0.021914219906593757,
|
|
"calib/step_q_w": 0.43874868559411145,
|
|
"calib/step_q_w_n": 951.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2617.0,
|
|
"completions/max_terminated_length": 2617.0,
|
|
"completions/mean_length": 550.63671875,
|
|
"completions/mean_terminated_length": 550.63671875,
|
|
"completions/min_length": 154.0,
|
|
"completions/min_terminated_length": 154.0,
|
|
"epoch": 0.16106666666666666,
|
|
"grad_norm": 0.08831620961427689,
|
|
"learning_rate": 1.3611111111111112e-06,
|
|
"loss": -0.0575,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.034764066338539124,
|
|
"mask/share_reasoning": 0.824368953704834,
|
|
"mask/share_step_conf": 0.1408669650554657,
|
|
"num_tokens": 37749309.0,
|
|
"reward": 1.002524733543396,
|
|
"reward_std": 0.22087067365646362,
|
|
"rewards/accuracy_reward_step": 0.4375,
|
|
"rewards/final_brier_reward_step": 0.480559766292572,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/step_l2_reward": 0.8293472528457642,
|
|
"step": 151
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5274214744567871,
|
|
"adv/mean_abs_reasoning": 0.4665879011154175,
|
|
"adv/mean_abs_step_conf": 0.7688575983047485,
|
|
"adv/ratio_final_to_reasoning": 1.130379663073007,
|
|
"adv/ratio_step_to_reasoning": 1.647830122612974,
|
|
"adv/std_final_conf": 0.7581343054771423,
|
|
"adv/std_reasoning": 0.7206973433494568,
|
|
"adv/std_step_conf": 0.9336856603622437,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5688613231552163,
|
|
"calib/avg_num_step_conf": 6.40625,
|
|
"calib/ece": 0.4482868525896417,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.9721115537848606,
|
|
"calib/gap": 0.016660305343511372,
|
|
"calib/mean_conf": 0.9611952191235061,
|
|
"calib/mu_c": 0.9691603053435115,
|
|
"calib/mu_w": 0.9525000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.44378486055776917,
|
|
"calib/std_conf": 0.10977778050535121,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.44863247863247857,
|
|
"calib/step_q_c_n": 819.0,
|
|
"calib/step_q_gap": 0.06633040798692436,
|
|
"calib/step_q_w": 0.3823020706455542,
|
|
"calib/step_q_w_n": 821.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2761.0,
|
|
"completions/max_terminated_length": 2761.0,
|
|
"completions/mean_length": 512.44140625,
|
|
"completions/mean_terminated_length": 514.4509887695312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 93.0,
|
|
"epoch": 0.16213333333333332,
|
|
"grad_norm": 0.07020580768585205,
|
|
"learning_rate": 1.3333333333333334e-06,
|
|
"loss": -0.007,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03603512793779373,
|
|
"mask/share_reasoning": 0.8120272755622864,
|
|
"mask/share_step_conf": 0.1480313539505005,
|
|
"num_tokens": 37985886.0,
|
|
"reward": 1.0656317472457886,
|
|
"reward_std": 0.22288453578948975,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5391280651092529,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8629859685897827,
|
|
"step": 152
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5444529056549072,
|
|
"adv/mean_abs_reasoning": 0.45993316173553467,
|
|
"adv/mean_abs_step_conf": 0.7532547116279602,
|
|
"adv/ratio_final_to_reasoning": 1.183765274937866,
|
|
"adv/ratio_step_to_reasoning": 1.6377482084257449,
|
|
"adv/std_final_conf": 0.7684758305549622,
|
|
"adv/std_reasoning": 0.7205182313919067,
|
|
"adv/std_step_conf": 0.9347703456878662,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5891861761426979,
|
|
"calib/avg_num_step_conf": 6.32421875,
|
|
"calib/ece": 0.4164313725490199,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.9529411764705882,
|
|
"calib/gap": 0.04690821256038624,
|
|
"calib/mean_conf": 0.9576078431372551,
|
|
"calib/mu_c": 0.9791304347826087,
|
|
"calib/mu_w": 0.9322222222222225,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.4164313725490199,
|
|
"calib/std_conf": 0.10660612468727179,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4672058823529412,
|
|
"calib/step_q_c_n": 748.0,
|
|
"calib/step_q_gap": 0.0961381441210239,
|
|
"calib/step_q_w": 0.3710677382319173,
|
|
"calib/step_q_w_n": 871.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3015.0,
|
|
"completions/max_terminated_length": 3015.0,
|
|
"completions/mean_length": 512.5,
|
|
"completions/mean_terminated_length": 514.5098266601562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 138.0,
|
|
"epoch": 0.1632,
|
|
"grad_norm": 0.07010144740343094,
|
|
"learning_rate": 1.3055555555555556e-06,
|
|
"loss": -0.0974,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.034779004752635956,
|
|
"mask/share_reasoning": 0.8166590929031372,
|
|
"mask/share_step_conf": 0.14465561509132385,
|
|
"num_tokens": 38224406.0,
|
|
"reward": 1.087091088294983,
|
|
"reward_std": 0.20078277587890625,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.5799410343170166,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8597024083137512,
|
|
"step": 153
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6241574287414551,
|
|
"adv/mean_abs_reasoning": 0.5177608132362366,
|
|
"adv/mean_abs_step_conf": 0.7719071507453918,
|
|
"adv/ratio_final_to_reasoning": 1.2054937584793104,
|
|
"adv/ratio_step_to_reasoning": 1.490856648498806,
|
|
"adv/std_final_conf": 0.8262401819229126,
|
|
"adv/std_reasoning": 0.7575730681419373,
|
|
"adv/std_step_conf": 0.9345754981040955,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6089230371900827,
|
|
"calib/avg_num_step_conf": 6.2734375,
|
|
"calib/ece": 0.4795983935742974,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.9839357429718876,
|
|
"calib/gap": 0.025874225206611645,
|
|
"calib/mean_conf": 0.9655421686746989,
|
|
"calib/mu_c": 0.9788429752066119,
|
|
"calib/mu_w": 0.9529687500000003,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4795983935742974,
|
|
"calib/std_conf": 0.09680336147099437,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.462365747460087,
|
|
"calib/step_q_c_n": 689.0,
|
|
"calib/step_q_gap": 0.0889088227054523,
|
|
"calib/step_q_w": 0.3734569247546347,
|
|
"calib/step_q_w_n": 917.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0234375,
|
|
"completions/max_length": 1487.0,
|
|
"completions/max_terminated_length": 1487.0,
|
|
"completions/mean_length": 450.4453125,
|
|
"completions/mean_terminated_length": 461.2560119628906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 178.0,
|
|
"epoch": 0.16426666666666667,
|
|
"grad_norm": 0.09528306126594543,
|
|
"learning_rate": 1.2777777777777779e-06,
|
|
"loss": -0.0861,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03584619611501694,
|
|
"mask/share_reasoning": 0.8004294633865356,
|
|
"mask/share_step_conf": 0.14028680324554443,
|
|
"num_tokens": 38444160.0,
|
|
"reward": 1.0437710285186768,
|
|
"reward_std": 0.22803018987178802,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/final_brier_reward_step": 0.5094179511070251,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/step_l2_reward": 0.8593744039535522,
|
|
"step": 154
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5692579746246338,
|
|
"adv/mean_abs_reasoning": 0.4835493862628937,
|
|
"adv/mean_abs_step_conf": 0.7541655898094177,
|
|
"adv/ratio_final_to_reasoning": 1.1772488825270528,
|
|
"adv/ratio_step_to_reasoning": 1.5596454286458277,
|
|
"adv/std_final_conf": 0.8165688514709473,
|
|
"adv/std_reasoning": 0.7574070692062378,
|
|
"adv/std_step_conf": 0.9348653554916382,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6760942760942762,
|
|
"calib/avg_num_step_conf": 6.16796875,
|
|
"calib/ece": 0.4878906250000002,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.93359375,
|
|
"calib/gap": 0.02003183348637927,
|
|
"calib/mean_conf": 0.9311718750000002,
|
|
"calib/mu_c": 0.9417355371900828,
|
|
"calib/mu_w": 0.9217037037037036,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.47320312500000017,
|
|
"calib/std_conf": 0.1767595519596731,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4213231481481481,
|
|
"calib/step_q_c_n": 756.0,
|
|
"calib/step_q_gap": 0.08427576054182978,
|
|
"calib/step_q_w": 0.3370473876063183,
|
|
"calib/step_q_w_n": 823.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1159.0,
|
|
"completions/max_terminated_length": 1159.0,
|
|
"completions/mean_length": 436.0625,
|
|
"completions/mean_terminated_length": 437.7725830078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 160.0,
|
|
"epoch": 0.16533333333333333,
|
|
"grad_norm": 0.10584117472171783,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": -0.0045,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.038839589804410934,
|
|
"mask/share_reasoning": 0.7988035082817078,
|
|
"mask/share_step_conf": 0.15845061838626862,
|
|
"num_tokens": 38663008.0,
|
|
"reward": 1.0421983003616333,
|
|
"reward_std": 0.19966325163841248,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/final_brier_reward_step": 0.515350341796875,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/step_l2_reward": 0.8501973748207092,
|
|
"step": 155
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.630531907081604,
|
|
"adv/mean_abs_reasoning": 0.46726131439208984,
|
|
"adv/mean_abs_step_conf": 0.724333643913269,
|
|
"adv/ratio_final_to_reasoning": 1.3494203086380696,
|
|
"adv/ratio_step_to_reasoning": 1.5501682283619649,
|
|
"adv/std_final_conf": 0.8626736402511597,
|
|
"adv/std_reasoning": 0.7391840219497681,
|
|
"adv/std_step_conf": 0.9196922183036804,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5732221583285414,
|
|
"calib/avg_num_step_conf": 6.41015625,
|
|
"calib/ece": 0.49123015873015896,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.8968253968253969,
|
|
"calib/gap": 0.0300651715545337,
|
|
"calib/mean_conf": 0.9061507936507938,
|
|
"calib/mu_c": 0.9229729729729734,
|
|
"calib/mu_w": 0.8929078014184397,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4784523809523812,
|
|
"calib/std_conf": 0.21891295535709995,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.43821766561514186,
|
|
"calib/step_q_c_n": 634.0,
|
|
"calib/step_q_gap": 0.07983236273530075,
|
|
"calib/step_q_w": 0.3583853028798411,
|
|
"calib/step_q_w_n": 1007.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2838.0,
|
|
"completions/max_terminated_length": 2838.0,
|
|
"completions/mean_length": 507.0234375,
|
|
"completions/mean_terminated_length": 509.01177978515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 110.0,
|
|
"epoch": 0.1664,
|
|
"grad_norm": 0.09417561441659927,
|
|
"learning_rate": 1.2222222222222223e-06,
|
|
"loss": -0.0404,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03862348943948746,
|
|
"mask/share_reasoning": 0.8037759065628052,
|
|
"mask/share_step_conf": 0.15369439125061035,
|
|
"num_tokens": 38897566.0,
|
|
"reward": 1.0278263092041016,
|
|
"reward_std": 0.2014266550540924,
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
|
"rewards/final_brier_reward_step": 0.4957183599472046,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8508935570716858,
|
|
"step": 156
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.594262957572937,
|
|
"adv/mean_abs_reasoning": 0.48525696992874146,
|
|
"adv/mean_abs_step_conf": 0.7258875966072083,
|
|
"adv/ratio_final_to_reasoning": 1.2246355939208926,
|
|
"adv/ratio_step_to_reasoning": 1.4958828859558735,
|
|
"adv/std_final_conf": 0.8026941418647766,
|
|
"adv/std_reasoning": 0.7393622994422913,
|
|
"adv/std_step_conf": 0.9335121512413025,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.766922471467926,
|
|
"calib/avg_num_step_conf": 6.21484375,
|
|
"calib/ece": 0.29592885375494093,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.8853754940711462,
|
|
"calib/gap": 0.1753607503607505,
|
|
"calib/mean_conf": 0.9020948616600791,
|
|
"calib/mu_c": 0.9707142857142858,
|
|
"calib/mu_w": 0.7953535353535353,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.2946640316205536,
|
|
"calib/std_conf": 0.22021719177808333,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.447638737758433,
|
|
"calib/step_q_c_n": 919.0,
|
|
"calib/step_q_gap": 0.12689469013938537,
|
|
"calib/step_q_w": 0.3207440476190476,
|
|
"calib/step_q_w_n": 672.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1375.0,
|
|
"completions/max_terminated_length": 1375.0,
|
|
"completions/mean_length": 466.95703125,
|
|
"completions/mean_terminated_length": 470.63385009765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.16746666666666668,
|
|
"grad_norm": 0.09445346146821976,
|
|
"learning_rate": 1.1944444444444446e-06,
|
|
"loss": -0.0623,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03903867304325104,
|
|
"mask/share_reasoning": 0.8008126616477966,
|
|
"mask/share_step_conf": 0.15233619511127472,
|
|
"num_tokens": 39120835.0,
|
|
"reward": 1.1484313011169434,
|
|
"reward_std": 0.22829920053482056,
|
|
"rewards/accuracy_reward_step": 0.6015625,
|
|
"rewards/final_brier_reward_step": 0.6980769038200378,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8554403781890869,
|
|
"step": 157
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5869907140731812,
|
|
"adv/mean_abs_reasoning": 0.48486799001693726,
|
|
"adv/mean_abs_step_conf": 0.7452125549316406,
|
|
"adv/ratio_final_to_reasoning": 1.2106196452619538,
|
|
"adv/ratio_step_to_reasoning": 1.536939064394845,
|
|
"adv/std_final_conf": 0.7773375511169434,
|
|
"adv/std_reasoning": 0.7394952774047852,
|
|
"adv/std_step_conf": 0.9342270493507385,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.6505353620738236,
|
|
"calib/avg_num_step_conf": 6.44921875,
|
|
"calib/ece": 0.32323886639676147,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.8704453441295547,
|
|
"calib/gap": 0.037802197802197623,
|
|
"calib/mean_conf": 0.8999190283400811,
|
|
"calib/mu_c": 0.9138461538461539,
|
|
"calib/mu_w": 0.8760439560439562,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.2957894736842109,
|
|
"calib/std_conf": 0.21804057844458355,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4356462585034014,
|
|
"calib/step_q_c_n": 882.0,
|
|
"calib/step_q_gap": 0.08148501012888898,
|
|
"calib/step_q_w": 0.3541612483745124,
|
|
"calib/step_q_w_n": 769.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 2389.0,
|
|
"completions/max_terminated_length": 2389.0,
|
|
"completions/mean_length": 500.8515625,
|
|
"completions/mean_terminated_length": 510.8287048339844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 130.0,
|
|
"epoch": 0.16853333333333334,
|
|
"grad_norm": 0.07168283313512802,
|
|
"learning_rate": 1.1666666666666668e-06,
|
|
"loss": -0.0864,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.038769736886024475,
|
|
"mask/share_reasoning": 0.7923343777656555,
|
|
"mask/share_step_conf": 0.1493646502494812,
|
|
"num_tokens": 39354293.0,
|
|
"reward": 1.0865575075149536,
|
|
"reward_std": 0.24829015135765076,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/final_brier_reward_step": 0.6341601610183716,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"rewards/step_l2_reward": 0.8171157240867615,
|
|
"step": 158
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6913203001022339,
|
|
"adv/mean_abs_reasoning": 0.502968430519104,
|
|
"adv/mean_abs_step_conf": 0.7115085124969482,
|
|
"adv/ratio_final_to_reasoning": 1.3744805004734304,
|
|
"adv/ratio_step_to_reasoning": 1.4146186307610082,
|
|
"adv/std_final_conf": 0.867906928062439,
|
|
"adv/std_reasoning": 0.7575962543487549,
|
|
"adv/std_step_conf": 0.934805154800415,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6377729674667346,
|
|
"calib/avg_num_step_conf": 6.41015625,
|
|
"calib/ece": 0.366468253968254,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.8214285714285714,
|
|
"calib/gap": 0.07891449672120698,
|
|
"calib/mean_conf": 0.8520238095238094,
|
|
"calib/mu_c": 0.887410071942446,
|
|
"calib/mu_w": 0.808495575221239,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.33345238095238094,
|
|
"calib/std_conf": 0.27575918192733595,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4125581395348837,
|
|
"calib/step_q_c_n": 860.0,
|
|
"calib/step_q_gap": 0.09972843402912185,
|
|
"calib/step_q_w": 0.31282970550576183,
|
|
"calib/step_q_w_n": 781.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2720.0,
|
|
"completions/max_terminated_length": 2720.0,
|
|
"completions/mean_length": 459.76171875,
|
|
"completions/mean_terminated_length": 463.38189697265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 131.0,
|
|
"epoch": 0.1696,
|
|
"grad_norm": 0.07267262041568756,
|
|
"learning_rate": 1.138888888888889e-06,
|
|
"loss": -0.1434,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03936787322163582,
|
|
"mask/share_reasoning": 0.788456916809082,
|
|
"mask/share_step_conf": 0.16436274349689484,
|
|
"num_tokens": 39576776.0,
|
|
"reward": 1.1042141914367676,
|
|
"reward_std": 0.23494693636894226,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/final_brier_reward_step": 0.6156214475631714,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/step_l2_reward": 0.8582253456115723,
|
|
"step": 159
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6293020248413086,
|
|
"adv/mean_abs_reasoning": 0.4387770891189575,
|
|
"adv/mean_abs_step_conf": 0.7438819408416748,
|
|
"adv/ratio_final_to_reasoning": 1.4342180584335331,
|
|
"adv/ratio_step_to_reasoning": 1.6953527412640268,
|
|
"adv/std_final_conf": 0.8369735479354858,
|
|
"adv/std_reasoning": 0.7014278769493103,
|
|
"adv/std_step_conf": 0.9351868629455566,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.67088370933566,
|
|
"calib/avg_num_step_conf": 5.85546875,
|
|
"calib/ece": 0.3426274509803922,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.7803921568627451,
|
|
"calib/gap": 0.09647949644771281,
|
|
"calib/mean_conf": 0.8202745098039215,
|
|
"calib/mu_c": 0.8630281690140845,
|
|
"calib/mu_w": 0.7665486725663717,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.3030196078431373,
|
|
"calib/std_conf": 0.30170028488430284,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3733294392523364,
|
|
"calib/step_q_c_n": 856.0,
|
|
"calib/step_q_gap": 0.04816614220723536,
|
|
"calib/step_q_w": 0.32516329704510105,
|
|
"calib/step_q_w_n": 643.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2301.0,
|
|
"completions/max_terminated_length": 2301.0,
|
|
"completions/mean_length": 462.1171875,
|
|
"completions/mean_terminated_length": 463.929443359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 92.0,
|
|
"epoch": 0.17066666666666666,
|
|
"grad_norm": 0.08778801560401917,
|
|
"learning_rate": 1.111111111111111e-06,
|
|
"loss": -0.0843,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.037485793232917786,
|
|
"mask/share_reasoning": 0.8103216886520386,
|
|
"mask/share_step_conf": 0.14828626811504364,
|
|
"num_tokens": 39799918.0,
|
|
"reward": 1.1009936332702637,
|
|
"reward_std": 0.1922154426574707,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/final_brier_reward_step": 0.6375530958175659,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/step_l2_reward": 0.8372268676757812,
|
|
"step": 160
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5192180275917053,
|
|
"adv/mean_abs_reasoning": 0.38679128885269165,
|
|
"adv/mean_abs_step_conf": 0.7360755205154419,
|
|
"adv/ratio_final_to_reasoning": 1.3423725987516952,
|
|
"adv/ratio_step_to_reasoning": 1.9030302432580743,
|
|
"adv/std_final_conf": 0.759878396987915,
|
|
"adv/std_reasoning": 0.6815119385719299,
|
|
"adv/std_step_conf": 0.9340335726737976,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7095197783592425,
|
|
"calib/avg_num_step_conf": 5.546875,
|
|
"calib/ece": 0.18051792828685265,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.7370517928286853,
|
|
"calib/gap": 0.2445090041557645,
|
|
"calib/mean_conf": 0.7917529880478087,
|
|
"calib/mu_c": 0.862865168539326,
|
|
"calib/mu_w": 0.6183561643835616,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.13155378486055783,
|
|
"calib/std_conf": 0.32005370991773774,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3799685863874346,
|
|
"calib/step_q_c_n": 955.0,
|
|
"calib/step_q_gap": 0.05801159714012277,
|
|
"calib/step_q_w": 0.3219569892473118,
|
|
"calib/step_q_w_n": 465.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2551.0,
|
|
"completions/max_terminated_length": 2551.0,
|
|
"completions/mean_length": 433.796875,
|
|
"completions/mean_terminated_length": 440.68255615234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.17173333333333332,
|
|
"grad_norm": 0.10147473961114883,
|
|
"learning_rate": 1.0833333333333335e-06,
|
|
"loss": -0.0665,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.04036349803209305,
|
|
"mask/share_reasoning": 0.7908325791358948,
|
|
"mask/share_step_conf": 0.15317894518375397,
|
|
"num_tokens": 40014890.0,
|
|
"reward": 1.1826231479644775,
|
|
"reward_std": 0.16954448819160461,
|
|
"rewards/accuracy_reward_step": 0.6953125,
|
|
"rewards/final_brier_reward_step": 0.7700152397155762,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/step_l2_reward": 0.8400498628616333,
|
|
"step": 161
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.555371880531311,
|
|
"adv/mean_abs_reasoning": 0.40420442819595337,
|
|
"adv/mean_abs_step_conf": 0.7501338720321655,
|
|
"adv/ratio_final_to_reasoning": 1.3739876205959662,
|
|
"adv/ratio_step_to_reasoning": 1.8558279417674013,
|
|
"adv/std_final_conf": 0.7655318379402161,
|
|
"adv/std_reasoning": 0.6816686391830444,
|
|
"adv/std_step_conf": 0.9353340268135071,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6940372575965796,
|
|
"calib/avg_num_step_conf": 5.36328125,
|
|
"calib/ece": 0.22521912350597617,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.7211155378486056,
|
|
"calib/gap": 0.14669033440219892,
|
|
"calib/mean_conf": 0.7970916334661354,
|
|
"calib/mu_c": 0.8403389830508475,
|
|
"calib/mu_w": 0.6936486486486486,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.15856573705179292,
|
|
"calib/std_conf": 0.3047968227476998,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.39499999999999996,
|
|
"calib/step_q_c_n": 888.0,
|
|
"calib/step_q_gap": 0.07203092783505155,
|
|
"calib/step_q_w": 0.3229690721649484,
|
|
"calib/step_q_w_n": 485.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 2077.0,
|
|
"completions/max_terminated_length": 2077.0,
|
|
"completions/mean_length": 420.1171875,
|
|
"completions/mean_terminated_length": 428.4860534667969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.1728,
|
|
"grad_norm": 0.0986003577709198,
|
|
"learning_rate": 1.0555555555555557e-06,
|
|
"loss": -0.1312,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.039783887565135956,
|
|
"mask/share_reasoning": 0.7854666113853455,
|
|
"mask/share_step_conf": 0.15521827340126038,
|
|
"num_tokens": 40226584.0,
|
|
"reward": 1.153937578201294,
|
|
"reward_std": 0.1762813925743103,
|
|
"rewards/accuracy_reward_step": 0.69140625,
|
|
"rewards/final_brier_reward_step": 0.736830472946167,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/step_l2_reward": 0.8249673843383789,
|
|
"step": 162
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6111153364181519,
|
|
"adv/mean_abs_reasoning": 0.3719629943370819,
|
|
"adv/mean_abs_step_conf": 0.7672944068908691,
|
|
"adv/ratio_final_to_reasoning": 1.6429465987800504,
|
|
"adv/ratio_step_to_reasoning": 2.0628245781770653,
|
|
"adv/std_final_conf": 0.8349888920783997,
|
|
"adv/std_reasoning": 0.6403541564941406,
|
|
"adv/std_step_conf": 0.9314863085746765,
|
|
"calib/answer_extract_rate": 0.95703125,
|
|
"calib/auroc": 0.7884820594904628,
|
|
"calib/avg_num_step_conf": 6.12890625,
|
|
"calib/ece": 0.15734693877551031,
|
|
"calib/final_conf_rate": 0.95703125,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.5061224489795918,
|
|
"calib/gap": 0.36323062558356706,
|
|
"calib/mean_conf": 0.632938775510204,
|
|
"calib/mu_c": 0.8093650793650796,
|
|
"calib/mu_w": 0.44613445378151256,
|
|
"calib/nonempty_final_conf_rate": 0.95703125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.13800000000000012,
|
|
"calib/std_conf": 0.3673606029204716,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4011027190332327,
|
|
"calib/step_q_c_n": 662.0,
|
|
"calib/step_q_gap": 0.13657129676200885,
|
|
"calib/step_q_w": 0.26453142227122384,
|
|
"calib/step_q_w_n": 907.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 2335.0,
|
|
"completions/max_terminated_length": 2335.0,
|
|
"completions/mean_length": 450.796875,
|
|
"completions/mean_terminated_length": 469.1219482421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 126.0,
|
|
"epoch": 0.17386666666666667,
|
|
"grad_norm": 0.08291774988174438,
|
|
"learning_rate": 1.0277777777777777e-06,
|
|
"loss": -0.182,
|
|
"mask/has_final_conf_rate": 0.95703125,
|
|
"mask/share_final_conf": 0.037739530205726624,
|
|
"mask/share_reasoning": 0.7714070677757263,
|
|
"mask/share_step_conf": 0.15179085731506348,
|
|
"num_tokens": 40446820.0,
|
|
"reward": 1.1517189741134644,
|
|
"reward_std": 0.1930740922689438,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/final_brier_reward_step": 0.7490097880363464,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"rewards/step_l2_reward": 0.8430562019348145,
|
|
"step": 163
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6865717172622681,
|
|
"adv/mean_abs_reasoning": 0.536221444606781,
|
|
"adv/mean_abs_step_conf": 0.7406268119812012,
|
|
"adv/ratio_final_to_reasoning": 1.2803883995459024,
|
|
"adv/ratio_step_to_reasoning": 1.381195808989537,
|
|
"adv/std_final_conf": 0.8637810349464417,
|
|
"adv/std_reasoning": 0.8099890351295471,
|
|
"adv/std_step_conf": 0.9336626529693604,
|
|
"calib/answer_extract_rate": 0.9453125,
|
|
"calib/auroc": 0.761631481861037,
|
|
"calib/avg_num_step_conf": 6.7578125,
|
|
"calib/ece": 0.1879338842975207,
|
|
"calib/final_conf_rate": 0.9453125,
|
|
"calib/format_rate": 0.94140625,
|
|
"calib/frac_conf_gt_0.9": 0.4090909090909091,
|
|
"calib/gap": 0.3282325613172099,
|
|
"calib/mean_conf": 0.5486776859504132,
|
|
"calib/mu_c": 0.7100813008130082,
|
|
"calib/mu_w": 0.38184873949579834,
|
|
"calib/nonempty_final_conf_rate": 0.9453125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.1141735537190083,
|
|
"calib/std_conf": 0.3860082689748488,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.35082677165354337,
|
|
"calib/step_q_c_n": 762.0,
|
|
"calib/step_q_gap": 0.08844040801717973,
|
|
"calib/step_q_w": 0.26238636363636364,
|
|
"calib/step_q_w_n": 968.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.04296875,
|
|
"completions/max_length": 2979.0,
|
|
"completions/max_terminated_length": 2979.0,
|
|
"completions/mean_length": 535.20703125,
|
|
"completions/mean_terminated_length": 559.2366943359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 121.0,
|
|
"epoch": 0.17493333333333333,
|
|
"grad_norm": 0.07999632507562637,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": -0.2353,
|
|
"mask/has_final_conf_rate": 0.9453125,
|
|
"mask/share_final_conf": 0.031994570046663284,
|
|
"mask/share_reasoning": 0.7777503728866577,
|
|
"mask/share_step_conf": 0.1472862958908081,
|
|
"num_tokens": 40689969.0,
|
|
"reward": 1.1251270771026611,
|
|
"reward_std": 0.2629609704017639,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/final_brier_reward_step": 0.7215191125869751,
|
|
"rewards/format_reward_step": 0.94140625,
|
|
"rewards/step_l2_reward": 0.829573392868042,
|
|
"step": 164
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6166242957115173,
|
|
"adv/mean_abs_reasoning": 0.3994438350200653,
|
|
"adv/mean_abs_step_conf": 0.7586116790771484,
|
|
"adv/ratio_final_to_reasoning": 1.5437071288897033,
|
|
"adv/ratio_step_to_reasoning": 1.8991698270647763,
|
|
"adv/std_final_conf": 0.8585282564163208,
|
|
"adv/std_reasoning": 0.7014773488044739,
|
|
"adv/std_step_conf": 0.9335530400276184,
|
|
"calib/answer_extract_rate": 0.921875,
|
|
"calib/auroc": 0.7362260919212104,
|
|
"calib/avg_num_step_conf": 5.94921875,
|
|
"calib/ece": 0.17637130801687773,
|
|
"calib/final_conf_rate": 0.92578125,
|
|
"calib/format_rate": 0.921875,
|
|
"calib/frac_conf_gt_0.9": 0.43037974683544306,
|
|
"calib/gap": 0.2973786754210678,
|
|
"calib/mean_conf": 0.5712236286919832,
|
|
"calib/mu_c": 0.7268141592920355,
|
|
"calib/mu_w": 0.42943548387096775,
|
|
"calib/nonempty_final_conf_rate": 0.92578125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.1354008438818566,
|
|
"calib/std_conf": 0.37505074282668666,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.374524959742351,
|
|
"calib/step_q_c_n": 621.0,
|
|
"calib/step_q_gap": 0.08076664488647523,
|
|
"calib/step_q_w": 0.2937583148558758,
|
|
"calib/step_q_w_n": 902.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05859375,
|
|
"completions/max_length": 3068.0,
|
|
"completions/max_terminated_length": 3068.0,
|
|
"completions/mean_length": 469.37890625,
|
|
"completions/mean_terminated_length": 498.5933837890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.1938386857509613,
|
|
"learning_rate": 9.722222222222224e-07,
|
|
"loss": -0.1938,
|
|
"mask/has_final_conf_rate": 0.92578125,
|
|
"mask/share_final_conf": 0.03441419079899788,
|
|
"mask/share_reasoning": 0.7516778707504272,
|
|
"mask/share_step_conf": 0.15531416237354279,
|
|
"num_tokens": 40915706.0,
|
|
"reward": 1.0904395580291748,
|
|
"reward_std": 0.24750910699367523,
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
|
"rewards/final_brier_reward_step": 0.6902323961257935,
|
|
"rewards/format_reward_step": 0.921875,
|
|
"rewards/step_l2_reward": 0.8119936585426331,
|
|
"step": 165
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7183117866516113,
|
|
"adv/mean_abs_reasoning": 0.47158336639404297,
|
|
"adv/mean_abs_step_conf": 0.7550788521766663,
|
|
"adv/ratio_final_to_reasoning": 1.5231915242137875,
|
|
"adv/ratio_step_to_reasoning": 1.6011566691810366,
|
|
"adv/std_final_conf": 0.8728970885276794,
|
|
"adv/std_reasoning": 0.7396736741065979,
|
|
"adv/std_step_conf": 0.9338595867156982,
|
|
"calib/answer_extract_rate": 0.91796875,
|
|
"calib/auroc": 0.7622749231444884,
|
|
"calib/avg_num_step_conf": 6.0703125,
|
|
"calib/ece": 0.22136752136752139,
|
|
"calib/final_conf_rate": 0.9140625,
|
|
"calib/format_rate": 0.90625,
|
|
"calib/frac_conf_gt_0.9": 0.38461538461538464,
|
|
"calib/gap": 0.3347351778656126,
|
|
"calib/mean_conf": 0.5348717948717948,
|
|
"calib/mu_c": 0.6335757575757576,
|
|
"calib/mu_w": 0.298840579710145,
|
|
"calib/nonempty_final_conf_rate": 0.9140625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.025555555555555574,
|
|
"calib/std_conf": 0.3772813577155442,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.35048582995951416,
|
|
"calib/step_q_c_n": 988.0,
|
|
"calib/step_q_gap": 0.0885246992174647,
|
|
"calib/step_q_w": 0.26196113074204946,
|
|
"calib/step_q_w_n": 566.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.07421875,
|
|
"completions/max_length": 2883.0,
|
|
"completions/max_terminated_length": 2883.0,
|
|
"completions/mean_length": 488.7109375,
|
|
"completions/mean_terminated_length": 527.8902587890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 116.0,
|
|
"epoch": 0.17706666666666668,
|
|
"grad_norm": 0.10186661779880524,
|
|
"learning_rate": 9.444444444444445e-07,
|
|
"loss": -0.3896,
|
|
"mask/has_final_conf_rate": 0.9140625,
|
|
"mask/share_final_conf": 0.03354319930076599,
|
|
"mask/share_reasoning": 0.7347061634063721,
|
|
"mask/share_step_conf": 0.1575319468975067,
|
|
"num_tokens": 41147000.0,
|
|
"reward": 1.089632272720337,
|
|
"reward_std": 0.3075636029243469,
|
|
"rewards/accuracy_reward_step": 0.64453125,
|
|
"rewards/final_brier_reward_step": 0.6920523643493652,
|
|
"rewards/format_reward_step": 0.90625,
|
|
"rewards/step_l2_reward": 0.7847039699554443,
|
|
"step": 166
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7088315486907959,
|
|
"adv/mean_abs_reasoning": 0.5106958150863647,
|
|
"adv/mean_abs_step_conf": 0.7436408996582031,
|
|
"adv/ratio_final_to_reasoning": 1.3879721112868804,
|
|
"adv/ratio_step_to_reasoning": 1.4561327461288176,
|
|
"adv/std_final_conf": 0.894888162612915,
|
|
"adv/std_reasoning": 0.7757686972618103,
|
|
"adv/std_step_conf": 0.9358707070350647,
|
|
"calib/answer_extract_rate": 0.8984375,
|
|
"calib/auroc": 0.5773578811369509,
|
|
"calib/avg_num_step_conf": 5.984375,
|
|
"calib/ece": 0.2913913043478261,
|
|
"calib/final_conf_rate": 0.8984375,
|
|
"calib/format_rate": 0.89453125,
|
|
"calib/frac_conf_gt_0.9": 0.391304347826087,
|
|
"calib/gap": 0.1000952842377264,
|
|
"calib/mean_conf": 0.572086956521739,
|
|
"calib/mu_c": 0.6095138888888889,
|
|
"calib/mu_w": 0.5094186046511625,
|
|
"calib/nonempty_final_conf_rate": 0.8984375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.11869565217391312,
|
|
"calib/std_conf": 0.35580770491003416,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.363421926910299,
|
|
"calib/step_q_c_n": 903.0,
|
|
"calib/step_q_gap": 0.04022637842063287,
|
|
"calib/step_q_w": 0.32319554848966614,
|
|
"calib/step_q_w_n": 629.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.08984375,
|
|
"completions/max_length": 2545.0,
|
|
"completions/max_terminated_length": 2545.0,
|
|
"completions/mean_length": 443.8515625,
|
|
"completions/mean_terminated_length": 487.66522216796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 135.0,
|
|
"epoch": 0.17813333333333334,
|
|
"grad_norm": 0.08605474978685379,
|
|
"learning_rate": 9.166666666666666e-07,
|
|
"loss": -0.5211,
|
|
"mask/has_final_conf_rate": 0.8984375,
|
|
"mask/share_final_conf": 0.03470229357481003,
|
|
"mask/share_reasoning": 0.7189175486564636,
|
|
"mask/share_step_conf": 0.15653637051582336,
|
|
"num_tokens": 41366234.0,
|
|
"reward": 1.0280582904815674,
|
|
"reward_std": 0.3161855936050415,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/final_brier_reward_step": 0.6122652292251587,
|
|
"rewards/format_reward_step": 0.89453125,
|
|
"rewards/step_l2_reward": 0.7682965993881226,
|
|
"step": 167
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7873885631561279,
|
|
"adv/mean_abs_reasoning": 0.6657405495643616,
|
|
"adv/mean_abs_step_conf": 0.7577903866767883,
|
|
"adv/ratio_final_to_reasoning": 1.1827258587018152,
|
|
"adv/ratio_step_to_reasoning": 1.1382668325861496,
|
|
"adv/std_final_conf": 0.9266418814659119,
|
|
"adv/std_reasoning": 0.8594803810119629,
|
|
"adv/std_step_conf": 0.9359583258628845,
|
|
"calib/answer_extract_rate": 0.8671875,
|
|
"calib/auroc": 0.7620819397993311,
|
|
"calib/avg_num_step_conf": 5.62109375,
|
|
"calib/ece": 0.18878378378378377,
|
|
"calib/final_conf_rate": 0.8671875,
|
|
"calib/format_rate": 0.859375,
|
|
"calib/frac_conf_gt_0.9": 0.21621621621621623,
|
|
"calib/gap": 0.31751839464882925,
|
|
"calib/mean_conf": 0.40310810810810815,
|
|
"calib/mu_c": 0.5346923076923076,
|
|
"calib/mu_w": 0.2171739130434783,
|
|
"calib/nonempty_final_conf_rate": 0.8671875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.003153153153153153,
|
|
"calib/std_conf": 0.3323947737404039,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.34687407407407406,
|
|
"calib/step_q_c_n": 675.0,
|
|
"calib/step_q_gap": 0.0813766918751212,
|
|
"calib/step_q_w": 0.26549738219895286,
|
|
"calib/step_q_w_n": 764.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.12890625,
|
|
"completions/max_length": 2851.0,
|
|
"completions/max_terminated_length": 2851.0,
|
|
"completions/mean_length": 477.03125,
|
|
"completions/mean_terminated_length": 547.6233520507812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 149.0,
|
|
"epoch": 0.1792,
|
|
"grad_norm": 0.0872519463300705,
|
|
"learning_rate": 8.88888888888889e-07,
|
|
"loss": -0.6337,
|
|
"mask/has_final_conf_rate": 0.8671875,
|
|
"mask/share_final_conf": 0.029452074319124222,
|
|
"mask/share_reasoning": 0.6938341856002808,
|
|
"mask/share_step_conf": 0.1478075236082077,
|
|
"num_tokens": 41593026.0,
|
|
"reward": 1.0312435626983643,
|
|
"reward_std": 0.3793541491031647,
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
|
"rewards/final_brier_reward_step": 0.660910964012146,
|
|
"rewards/format_reward_step": 0.859375,
|
|
"rewards/step_l2_reward": 0.7520923614501953,
|
|
"step": 168
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7701253890991211,
|
|
"adv/mean_abs_reasoning": 0.6043051481246948,
|
|
"adv/mean_abs_step_conf": 0.784436821937561,
|
|
"adv/ratio_final_to_reasoning": 1.2743981935103592,
|
|
"adv/ratio_step_to_reasoning": 1.2980806540732912,
|
|
"adv/std_final_conf": 0.9355708360671997,
|
|
"adv/std_reasoning": 0.8268880248069763,
|
|
"adv/std_step_conf": 0.9356573820114136,
|
|
"calib/answer_extract_rate": 0.83203125,
|
|
"calib/auroc": 0.6584119496855345,
|
|
"calib/avg_num_step_conf": 4.75390625,
|
|
"calib/ece": 0.22218457943925218,
|
|
"calib/final_conf_rate": 0.8359375,
|
|
"calib/format_rate": 0.82421875,
|
|
"calib/frac_conf_gt_0.9": 0.21962616822429906,
|
|
"calib/gap": 0.178536862334032,
|
|
"calib/mean_conf": 0.4128621495327102,
|
|
"calib/mu_c": 0.5012962962962961,
|
|
"calib/mu_w": 0.32275943396226414,
|
|
"calib/nonempty_final_conf_rate": 0.8359375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.06518691588785036,
|
|
"calib/std_conf": 0.32377449161076505,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.35374999999999995,
|
|
"calib/step_q_c_n": 520.0,
|
|
"calib/step_q_gap": 0.05394727403156385,
|
|
"calib/step_q_w": 0.2998027259684361,
|
|
"calib/step_q_w_n": 697.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1484375,
|
|
"completions/max_length": 2865.0,
|
|
"completions/max_terminated_length": 2865.0,
|
|
"completions/mean_length": 430.98828125,
|
|
"completions/mean_terminated_length": 506.1146545410156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 74.0,
|
|
"epoch": 0.18026666666666666,
|
|
"grad_norm": 0.10154373943805695,
|
|
"learning_rate": 8.611111111111112e-07,
|
|
"loss": -0.5491,
|
|
"mask/has_final_conf_rate": 0.8359375,
|
|
"mask/share_final_conf": 0.0335194393992424,
|
|
"mask/share_reasoning": 0.6624668836593628,
|
|
"mask/share_step_conf": 0.15557613968849182,
|
|
"num_tokens": 41807543.0,
|
|
"reward": 0.9480201005935669,
|
|
"reward_std": 0.3434249460697174,
|
|
"rewards/accuracy_reward_step": 0.421875,
|
|
"rewards/final_brier_reward_step": 0.5995436906814575,
|
|
"rewards/format_reward_step": 0.82421875,
|
|
"rewards/step_l2_reward": 0.69818514585495,
|
|
"step": 169
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7581398487091064,
|
|
"adv/mean_abs_reasoning": 0.6952020525932312,
|
|
"adv/mean_abs_step_conf": 0.7796958684921265,
|
|
"adv/ratio_final_to_reasoning": 1.0905316603728452,
|
|
"adv/ratio_step_to_reasoning": 1.1215385017689719,
|
|
"adv/std_final_conf": 0.9218901991844177,
|
|
"adv/std_reasoning": 0.8908805251121521,
|
|
"adv/std_step_conf": 0.9365332126617432,
|
|
"calib/answer_extract_rate": 0.7578125,
|
|
"calib/auroc": 0.6416815942678011,
|
|
"calib/avg_num_step_conf": 5.51953125,
|
|
"calib/ece": 0.323160621761658,
|
|
"calib/final_conf_rate": 0.75390625,
|
|
"calib/format_rate": 0.73828125,
|
|
"calib/frac_conf_gt_0.9": 0.21243523316062177,
|
|
"calib/gap": 0.12132781012091348,
|
|
"calib/mean_conf": 0.41487046632124347,
|
|
"calib/mu_c": 0.4632758620689654,
|
|
"calib/mu_w": 0.34194805194805195,
|
|
"calib/nonempty_final_conf_rate": 0.75390625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.06849740932642484,
|
|
"calib/std_conf": 0.33441172353493587,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.34593893129770986,
|
|
"calib/step_q_c_n": 655.0,
|
|
"calib/step_q_gap": 0.04616320570404231,
|
|
"calib/step_q_w": 0.29977572559366755,
|
|
"calib/step_q_w_n": 758.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.22265625,
|
|
"completions/max_length": 3062.0,
|
|
"completions/max_terminated_length": 3062.0,
|
|
"completions/mean_length": 455.69921875,
|
|
"completions/mean_terminated_length": 586.2261352539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.18133333333333335,
|
|
"grad_norm": 0.09000255912542343,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": -0.9007,
|
|
"mask/has_final_conf_rate": 0.75390625,
|
|
"mask/share_final_conf": 0.024297218769788742,
|
|
"mask/share_reasoning": 0.594523012638092,
|
|
"mask/share_step_conf": 0.1585235595703125,
|
|
"num_tokens": 42028354.0,
|
|
"reward": 0.8514015078544617,
|
|
"reward_std": 0.4654655456542969,
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
|
"rewards/final_brier_reward_step": 0.49094337224960327,
|
|
"rewards/format_reward_step": 0.73828125,
|
|
"rewards/step_l2_reward": 0.648531436920166,
|
|
"step": 170
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7609789967536926,
|
|
"adv/mean_abs_reasoning": 0.6566393375396729,
|
|
"adv/mean_abs_step_conf": 0.7822959423065186,
|
|
"adv/ratio_final_to_reasoning": 1.1588994951246212,
|
|
"adv/ratio_step_to_reasoning": 1.1913631998315266,
|
|
"adv/std_final_conf": 0.9215835332870483,
|
|
"adv/std_reasoning": 0.843531608581543,
|
|
"adv/std_step_conf": 0.921733021736145,
|
|
"calib/answer_extract_rate": 0.7109375,
|
|
"calib/auroc": 0.5436290617013508,
|
|
"calib/avg_num_step_conf": 5.06640625,
|
|
"calib/ece": 0.35318681318681316,
|
|
"calib/final_conf_rate": 0.7109375,
|
|
"calib/format_rate": 0.703125,
|
|
"calib/frac_conf_gt_0.9": 0.17032967032967034,
|
|
"calib/gap": 0.018617500304247303,
|
|
"calib/mean_conf": 0.3986813186813187,
|
|
"calib/mu_c": 0.4071717171717172,
|
|
"calib/mu_w": 0.3885542168674699,
|
|
"calib/nonempty_final_conf_rate": 0.7109375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.10395604395604394,
|
|
"calib/std_conf": 0.32226434031028584,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3401717557251908,
|
|
"calib/step_q_c_n": 524.0,
|
|
"calib/step_q_gap": 0.04644601187008085,
|
|
"calib/step_q_w": 0.29372574385510997,
|
|
"calib/step_q_w_n": 773.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.26953125,
|
|
"completions/max_length": 2899.0,
|
|
"completions/max_terminated_length": 2899.0,
|
|
"completions/mean_length": 387.21484375,
|
|
"completions/mean_terminated_length": 530.0909423828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 104.0,
|
|
"epoch": 0.1824,
|
|
"grad_norm": 0.13014192879199982,
|
|
"learning_rate": 8.055555555555557e-07,
|
|
"loss": -1.0885,
|
|
"mask/has_final_conf_rate": 0.7109375,
|
|
"mask/share_final_conf": 0.02836516499519348,
|
|
"mask/share_reasoning": 0.5377077460289001,
|
|
"mask/share_step_conf": 0.16439583897590637,
|
|
"num_tokens": 42234377.0,
|
|
"reward": 0.7764548063278198,
|
|
"reward_std": 0.4468100070953369,
|
|
"rewards/accuracy_reward_step": 0.38671875,
|
|
"rewards/final_brier_reward_step": 0.4449746012687683,
|
|
"rewards/format_reward_step": 0.703125,
|
|
"rewards/step_l2_reward": 0.5933108329772949,
|
|
"step": 171
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7410951852798462,
|
|
"adv/mean_abs_reasoning": 0.6668893694877625,
|
|
"adv/mean_abs_step_conf": 0.7876383066177368,
|
|
"adv/ratio_final_to_reasoning": 1.1112715529549995,
|
|
"adv/ratio_step_to_reasoning": 1.1810629208600545,
|
|
"adv/std_final_conf": 0.9066214561462402,
|
|
"adv/std_reasoning": 0.8595982193946838,
|
|
"adv/std_step_conf": 0.9360899329185486,
|
|
"calib/answer_extract_rate": 0.7734375,
|
|
"calib/auroc": 0.7890518596123625,
|
|
"calib/avg_num_step_conf": 4.828125,
|
|
"calib/ece": 0.1681818181818183,
|
|
"calib/final_conf_rate": 0.7734375,
|
|
"calib/format_rate": 0.765625,
|
|
"calib/frac_conf_gt_0.9": 0.1919191919191919,
|
|
"calib/gap": 0.30592771084337345,
|
|
"calib/mean_conf": 0.4157575757575757,
|
|
"calib/mu_c": 0.544,
|
|
"calib/mu_w": 0.23807228915662657,
|
|
"calib/nonempty_final_conf_rate": 0.7734375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0015656565656565666,
|
|
"calib/std_conf": 0.30808988229722994,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.3722596964586847,
|
|
"calib/step_q_c_n": 593.0,
|
|
"calib/step_q_gap": 0.07675425322384799,
|
|
"calib/step_q_w": 0.2955054432348367,
|
|
"calib/step_q_w_n": 643.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.19921875,
|
|
"completions/max_length": 2931.0,
|
|
"completions/max_terminated_length": 2931.0,
|
|
"completions/mean_length": 396.65625,
|
|
"completions/mean_terminated_length": 495.3365783691406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.18346666666666667,
|
|
"grad_norm": 0.11630342900753021,
|
|
"learning_rate": 7.777777777777779e-07,
|
|
"loss": -0.8021,
|
|
"mask/has_final_conf_rate": 0.7734375,
|
|
"mask/share_final_conf": 0.03203282505273819,
|
|
"mask/share_reasoning": 0.5971311926841736,
|
|
"mask/share_step_conf": 0.1716172695159912,
|
|
"num_tokens": 42439273.0,
|
|
"reward": 0.9423291683197021,
|
|
"reward_std": 0.44360941648483276,
|
|
"rewards/accuracy_reward_step": 0.44921875,
|
|
"rewards/final_brier_reward_step": 0.600367546081543,
|
|
"rewards/format_reward_step": 0.765625,
|
|
"rewards/step_l2_reward": 0.6942147016525269,
|
|
"step": 172
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7796030044555664,
|
|
"adv/mean_abs_reasoning": 0.6838363409042358,
|
|
"adv/mean_abs_step_conf": 0.7759481072425842,
|
|
"adv/ratio_final_to_reasoning": 1.1400432498581436,
|
|
"adv/ratio_step_to_reasoning": 1.1346985540671166,
|
|
"adv/std_final_conf": 0.9365084767341614,
|
|
"adv/std_reasoning": 0.875312328338623,
|
|
"adv/std_step_conf": 0.9361023902893066,
|
|
"calib/answer_extract_rate": 0.69140625,
|
|
"calib/auroc": 0.6069118579581484,
|
|
"calib/avg_num_step_conf": 3.99609375,
|
|
"calib/ece": 0.28174157303370784,
|
|
"calib/final_conf_rate": 0.6953125,
|
|
"calib/format_rate": 0.68359375,
|
|
"calib/frac_conf_gt_0.9": 0.12359550561797752,
|
|
"calib/gap": 0.07989220038046912,
|
|
"calib/mean_conf": 0.37432584269662916,
|
|
"calib/mu_c": 0.41157894736842093,
|
|
"calib/mu_w": 0.3316867469879518,
|
|
"calib/nonempty_final_conf_rate": 0.6953125,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.06117977528089885,
|
|
"calib/std_conf": 0.27864206330795527,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.37427160493827155,
|
|
"calib/step_q_c_n": 405.0,
|
|
"calib/step_q_gap": 0.04116480882176671,
|
|
"calib/step_q_w": 0.33310679611650484,
|
|
"calib/step_q_w_n": 618.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.265625,
|
|
"completions/max_length": 2977.0,
|
|
"completions/max_terminated_length": 2977.0,
|
|
"completions/mean_length": 402.9765625,
|
|
"completions/mean_terminated_length": 548.7340087890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 88.0,
|
|
"epoch": 0.18453333333333333,
|
|
"grad_norm": 0.08679164946079254,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": -1.104,
|
|
"mask/has_final_conf_rate": 0.6953125,
|
|
"mask/share_final_conf": 0.03351902961730957,
|
|
"mask/share_reasoning": 0.5200715065002441,
|
|
"mask/share_step_conf": 0.1807844340801239,
|
|
"num_tokens": 42645595.0,
|
|
"reward": 0.7727449536323547,
|
|
"reward_std": 0.44814908504486084,
|
|
"rewards/accuracy_reward_step": 0.37109375,
|
|
"rewards/final_brier_reward_step": 0.46951329708099365,
|
|
"rewards/format_reward_step": 0.68359375,
|
|
"rewards/step_l2_reward": 0.5766927003860474,
|
|
"step": 173
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7140973210334778,
|
|
"adv/mean_abs_reasoning": 0.7074859142303467,
|
|
"adv/mean_abs_step_conf": 0.7206076383590698,
|
|
"adv/ratio_final_to_reasoning": 1.009344930648299,
|
|
"adv/ratio_step_to_reasoning": 1.0185469757980947,
|
|
"adv/std_final_conf": 0.8916718363761902,
|
|
"adv/std_reasoning": 0.8908472061157227,
|
|
"adv/std_step_conf": 0.8918491005897522,
|
|
"calib/answer_extract_rate": 0.484375,
|
|
"calib/auroc": 0.5900680272108844,
|
|
"calib/avg_num_step_conf": 4.32421875,
|
|
"calib/ece": 0.1684677419354839,
|
|
"calib/final_conf_rate": 0.484375,
|
|
"calib/format_rate": 0.484375,
|
|
"calib/frac_conf_gt_0.9": 0.04032258064516129,
|
|
"calib/gap": 0.08675646258503394,
|
|
"calib/mean_conf": 0.26201612903225807,
|
|
"calib/mu_c": 0.31448979591836734,
|
|
"calib/mu_w": 0.2277333333333334,
|
|
"calib/nonempty_final_conf_rate": 0.484375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.017661290322580646,
|
|
"calib/std_conf": 0.20782661755814813,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.34782178217821785,
|
|
"calib/step_q_c_n": 202.0,
|
|
"calib/step_q_gap": 0.04724719654285875,
|
|
"calib/step_q_w": 0.3005745856353591,
|
|
"calib/step_q_w_n": 905.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.45703125,
|
|
"completions/max_length": 3027.0,
|
|
"completions/max_terminated_length": 3027.0,
|
|
"completions/mean_length": 381.796875,
|
|
"completions/mean_terminated_length": 703.1654663085938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 152.0,
|
|
"epoch": 0.1856,
|
|
"grad_norm": 0.2889764606952667,
|
|
"learning_rate": 7.222222222222222e-07,
|
|
"loss": -1.5071,
|
|
"mask/has_final_conf_rate": 0.484375,
|
|
"mask/share_final_conf": 0.018657710403203964,
|
|
"mask/share_reasoning": 0.3527323603630066,
|
|
"mask/share_step_conf": 0.17157870531082153,
|
|
"num_tokens": 42847567.0,
|
|
"reward": 0.567007303237915,
|
|
"reward_std": 0.4658864140510559,
|
|
"rewards/accuracy_reward_step": 0.19140625,
|
|
"rewards/final_brier_reward_step": 0.35918477177619934,
|
|
"rewards/format_reward_step": 0.484375,
|
|
"rewards/step_l2_reward": 0.42644909024238586,
|
|
"step": 174
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7631533741950989,
|
|
"adv/mean_abs_reasoning": 0.751017153263092,
|
|
"adv/mean_abs_step_conf": 0.7918944358825684,
|
|
"adv/ratio_final_to_reasoning": 1.0161597120375696,
|
|
"adv/ratio_step_to_reasoning": 1.054429226339064,
|
|
"adv/std_final_conf": 0.9218874573707581,
|
|
"adv/std_reasoning": 0.9059832692146301,
|
|
"adv/std_step_conf": 0.9221577048301697,
|
|
"calib/answer_extract_rate": 0.4296875,
|
|
"calib/auroc": 0.712914769030579,
|
|
"calib/avg_num_step_conf": 3.86328125,
|
|
"calib/ece": 0.20828828828828827,
|
|
"calib/final_conf_rate": 0.43359375,
|
|
"calib/format_rate": 0.41796875,
|
|
"calib/frac_conf_gt_0.9": 0.09009009009009009,
|
|
"calib/gap": 0.21828887443070916,
|
|
"calib/mean_conf": 0.2900900900900901,
|
|
"calib/mu_c": 0.4041509433962264,
|
|
"calib/mu_w": 0.18586206896551727,
|
|
"calib/nonempty_final_conf_rate": 0.43359375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.010450450450450456,
|
|
"calib/std_conf": 0.2692983726378555,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3528125,
|
|
"calib/step_q_c_n": 192.0,
|
|
"calib/step_q_gap": 0.059181383312421565,
|
|
"calib/step_q_w": 0.2936311166875784,
|
|
"calib/step_q_w_n": 797.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4921875,
|
|
"completions/max_length": 3061.0,
|
|
"completions/max_terminated_length": 3061.0,
|
|
"completions/mean_length": 434.8046875,
|
|
"completions/mean_terminated_length": 856.2307739257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 74.0,
|
|
"epoch": 0.18666666666666668,
|
|
"grad_norm": 0.16033883392810822,
|
|
"learning_rate": 6.944444444444446e-07,
|
|
"loss": -1.7116,
|
|
"mask/has_final_conf_rate": 0.43359375,
|
|
"mask/share_final_conf": 0.01755821704864502,
|
|
"mask/share_reasoning": 0.30797672271728516,
|
|
"mask/share_step_conf": 0.18227756023406982,
|
|
"num_tokens": 43064701.0,
|
|
"reward": 0.4913473129272461,
|
|
"reward_std": 0.5304877758026123,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.311006635427475,
|
|
"rewards/format_reward_step": 0.41796875,
|
|
"rewards/step_l2_reward": 0.36445868015289307,
|
|
"step": 175
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7785801291465759,
|
|
"adv/mean_abs_reasoning": 0.7697429656982422,
|
|
"adv/mean_abs_step_conf": 0.8165347576141357,
|
|
"adv/ratio_final_to_reasoning": 1.01148066801795,
|
|
"adv/ratio_step_to_reasoning": 1.0607888529042266,
|
|
"adv/std_final_conf": 0.9365408420562744,
|
|
"adv/std_reasoning": 0.9209882616996765,
|
|
"adv/std_step_conf": 0.936777651309967,
|
|
"calib/answer_extract_rate": 0.4765625,
|
|
"calib/auroc": 0.6894117647058824,
|
|
"calib/avg_num_step_conf": 3.37890625,
|
|
"calib/ece": 0.16142857142857137,
|
|
"calib/final_conf_rate": 0.4921875,
|
|
"calib/format_rate": 0.4765625,
|
|
"calib/frac_conf_gt_0.9": 0.03968253968253968,
|
|
"calib/gap": 0.16075294117647057,
|
|
"calib/mean_conf": 0.24333333333333337,
|
|
"calib/mu_c": 0.3390196078431373,
|
|
"calib/mu_w": 0.1782666666666667,
|
|
"calib/nonempty_final_conf_rate": 0.4921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.21761696362449057,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3471938775510203,
|
|
"calib/step_q_c_n": 196.0,
|
|
"calib/step_q_gap": 0.04832990146731331,
|
|
"calib/step_q_w": 0.298863976083707,
|
|
"calib/step_q_w_n": 669.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.453125,
|
|
"completions/max_length": 3064.0,
|
|
"completions/max_terminated_length": 3064.0,
|
|
"completions/mean_length": 388.859375,
|
|
"completions/mean_terminated_length": 711.05712890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 108.0,
|
|
"epoch": 0.18773333333333334,
|
|
"grad_norm": 0.20968042314052582,
|
|
"learning_rate": 6.666666666666667e-07,
|
|
"loss": -1.6118,
|
|
"mask/has_final_conf_rate": 0.4921875,
|
|
"mask/share_final_conf": 0.020962495356798172,
|
|
"mask/share_reasoning": 0.3428986966609955,
|
|
"mask/share_step_conf": 0.18301381170749664,
|
|
"num_tokens": 43268313.0,
|
|
"reward": 0.555877149105072,
|
|
"reward_std": 0.538494348526001,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.36044570803642273,
|
|
"rewards/format_reward_step": 0.4765625,
|
|
"rewards/step_l2_reward": 0.4107682406902313,
|
|
"step": 176
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7055525779724121,
|
|
"adv/mean_abs_reasoning": 0.6864043474197388,
|
|
"adv/mean_abs_step_conf": 0.7357356548309326,
|
|
"adv/ratio_final_to_reasoning": 1.0278964295967143,
|
|
"adv/ratio_step_to_reasoning": 1.0718691651599164,
|
|
"adv/std_final_conf": 0.8761857748031616,
|
|
"adv/std_reasoning": 0.8595938086509705,
|
|
"adv/std_step_conf": 0.8763537406921387,
|
|
"calib/answer_extract_rate": 0.41796875,
|
|
"calib/auroc": 0.5895833333333333,
|
|
"calib/avg_num_step_conf": 3.015625,
|
|
"calib/ece": 0.23651376146788994,
|
|
"calib/final_conf_rate": 0.42578125,
|
|
"calib/format_rate": 0.41796875,
|
|
"calib/frac_conf_gt_0.9": 0.009174311926605505,
|
|
"calib/gap": 0.05122916666666663,
|
|
"calib/mean_conf": 0.20458715596330276,
|
|
"calib/mu_c": 0.23466666666666663,
|
|
"calib/mu_w": 0.1834375,
|
|
"calib/nonempty_final_conf_rate": 0.42578125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.014128440366972478,
|
|
"calib/std_conf": 0.17430086879460374,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.34139860139860134,
|
|
"calib/step_q_c_n": 143.0,
|
|
"calib/step_q_gap": 0.0337356443238796,
|
|
"calib/step_q_w": 0.30766295707472174,
|
|
"calib/step_q_w_n": 629.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.53515625,
|
|
"completions/max_length": 3065.0,
|
|
"completions/max_terminated_length": 3065.0,
|
|
"completions/mean_length": 327.7890625,
|
|
"completions/mean_terminated_length": 705.1597290039062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 106.0,
|
|
"epoch": 0.1888,
|
|
"grad_norm": 0.4212135970592499,
|
|
"learning_rate": 6.388888888888889e-07,
|
|
"loss": -1.9844,
|
|
"mask/has_final_conf_rate": 0.42578125,
|
|
"mask/share_final_conf": 0.016804736107587814,
|
|
"mask/share_reasoning": 0.26395851373672485,
|
|
"mask/share_step_conf": 0.18408048152923584,
|
|
"num_tokens": 43456059.0,
|
|
"reward": 0.48361408710479736,
|
|
"reward_std": 0.47823816537857056,
|
|
"rewards/accuracy_reward_step": 0.17578125,
|
|
"rewards/final_brier_reward_step": 0.2939867079257965,
|
|
"rewards/format_reward_step": 0.41796875,
|
|
"rewards/step_l2_reward": 0.3696609139442444,
|
|
"step": 177
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7811000943183899,
|
|
"adv/mean_abs_reasoning": 0.8019535541534424,
|
|
"adv/mean_abs_step_conf": 0.8349236249923706,
|
|
"adv/ratio_final_to_reasoning": 0.9739966738384671,
|
|
"adv/ratio_step_to_reasoning": 1.04111219492472,
|
|
"adv/std_final_conf": 0.9219565987586975,
|
|
"adv/std_reasoning": 0.9210929274559021,
|
|
"adv/std_step_conf": 0.9368710517883301,
|
|
"calib/answer_extract_rate": 0.4375,
|
|
"calib/auroc": 0.7688524590163934,
|
|
"calib/avg_num_step_conf": 2.91015625,
|
|
"calib/ece": 0.3042342342342341,
|
|
"calib/final_conf_rate": 0.43359375,
|
|
"calib/format_rate": 0.4296875,
|
|
"calib/frac_conf_gt_0.9": 0.07207207207207207,
|
|
"calib/gap": 0.21966229508196722,
|
|
"calib/mean_conf": 0.2453153153153153,
|
|
"calib/mu_c": 0.3442622950819672,
|
|
"calib/mu_w": 0.12459999999999999,
|
|
"calib/nonempty_final_conf_rate": 0.43359375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.25174640400133486,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40408284023668634,
|
|
"calib/step_q_c_n": 169.0,
|
|
"calib/step_q_gap": 0.12107954162557522,
|
|
"calib/step_q_w": 0.2830032986111111,
|
|
"calib/step_q_w_n": 576.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.50390625,
|
|
"completions/max_length": 3056.0,
|
|
"completions/max_terminated_length": 3056.0,
|
|
"completions/mean_length": 344.79296875,
|
|
"completions/mean_terminated_length": 695.0157470703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 101.0,
|
|
"epoch": 0.18986666666666666,
|
|
"grad_norm": 0.15451431274414062,
|
|
"learning_rate": 6.111111111111112e-07,
|
|
"loss": -2.3624,
|
|
"mask/has_final_conf_rate": 0.43359375,
|
|
"mask/share_final_conf": 0.01961149275302887,
|
|
"mask/share_reasoning": 0.29202020168304443,
|
|
"mask/share_step_conf": 0.1844620406627655,
|
|
"num_tokens": 43650398.0,
|
|
"reward": 0.5029100775718689,
|
|
"reward_std": 0.5544787049293518,
|
|
"rewards/accuracy_reward_step": 0.23828125,
|
|
"rewards/final_brier_reward_step": 0.3045867085456848,
|
|
"rewards/format_reward_step": 0.4296875,
|
|
"rewards/step_l2_reward": 0.3784264326095581,
|
|
"step": 178
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7245436906814575,
|
|
"adv/mean_abs_reasoning": 0.7352625131607056,
|
|
"adv/mean_abs_step_conf": 0.7421489953994751,
|
|
"adv/ratio_final_to_reasoning": 0.9854217748254693,
|
|
"adv/ratio_step_to_reasoning": 1.0093660184158801,
|
|
"adv/std_final_conf": 0.9060723781585693,
|
|
"adv/std_reasoning": 0.9059785008430481,
|
|
"adv/std_step_conf": 0.9071795344352722,
|
|
"calib/answer_extract_rate": 0.3203125,
|
|
"calib/auroc": 0.7464285714285713,
|
|
"calib/avg_num_step_conf": 2.484375,
|
|
"calib/ece": 0.22743975903614458,
|
|
"calib/final_conf_rate": 0.32421875,
|
|
"calib/format_rate": 0.30078125,
|
|
"calib/frac_conf_gt_0.9": 0.07228915662650602,
|
|
"calib/gap": 0.1914181547619048,
|
|
"calib/mean_conf": 0.21472891566265062,
|
|
"calib/mu_c": 0.32542857142857146,
|
|
"calib/mu_w": 0.13401041666666666,
|
|
"calib/nonempty_final_conf_rate": 0.32421875,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.96875,
|
|
"calib/pce": 0.010240963855421682,
|
|
"calib/std_conf": 0.24911651482602507,
|
|
"calib/step_conf_rate": 0.96875,
|
|
"calib/step_q_c": 0.3837313432835821,
|
|
"calib/step_q_c_n": 67.0,
|
|
"calib/step_q_gap": 0.07870058757180703,
|
|
"calib/step_q_w": 0.30503075571177507,
|
|
"calib/step_q_w_n": 569.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.56640625,
|
|
"completions/max_length": 3044.0,
|
|
"completions/max_terminated_length": 3044.0,
|
|
"completions/mean_length": 429.3515625,
|
|
"completions/mean_terminated_length": 990.2162475585938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.19093333333333334,
|
|
"grad_norm": 0.2442111372947693,
|
|
"learning_rate": 5.833333333333334e-07,
|
|
"loss": -1.9978,
|
|
"mask/has_final_conf_rate": 0.32421875,
|
|
"mask/share_final_conf": 0.011269412003457546,
|
|
"mask/share_reasoning": 0.24272221326828003,
|
|
"mask/share_step_conf": 0.17960213124752045,
|
|
"num_tokens": 43866576.0,
|
|
"reward": 0.355862557888031,
|
|
"reward_std": 0.5103425979614258,
|
|
"rewards/accuracy_reward_step": 0.140625,
|
|
"rewards/final_brier_reward_step": 0.22041286528110504,
|
|
"rewards/format_reward_step": 0.30078125,
|
|
"rewards/step_l2_reward": 0.26868730783462524,
|
|
"step": 179
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.655032753944397,
|
|
"adv/mean_abs_reasoning": 0.6900363564491272,
|
|
"adv/mean_abs_step_conf": 0.6993515491485596,
|
|
"adv/ratio_final_to_reasoning": 0.9492728141385824,
|
|
"adv/ratio_step_to_reasoning": 1.0134995679754726,
|
|
"adv/std_final_conf": 0.844301164150238,
|
|
"adv/std_reasoning": 0.85964435338974,
|
|
"adv/std_step_conf": 0.8606016039848328,
|
|
"calib/answer_extract_rate": 0.37109375,
|
|
"calib/auroc": 0.6911637931034482,
|
|
"calib/avg_num_step_conf": 2.578125,
|
|
"calib/ece": 0.34959183673469385,
|
|
"calib/final_conf_rate": 0.3828125,
|
|
"calib/format_rate": 0.37109375,
|
|
"calib/frac_conf_gt_0.9": 0.02040816326530612,
|
|
"calib/gap": 0.1368534482758621,
|
|
"calib/mean_conf": 0.2422448979591837,
|
|
"calib/mu_c": 0.2981034482758621,
|
|
"calib/mu_w": 0.16125,
|
|
"calib/nonempty_final_conf_rate": 0.3828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.19933834743892975,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3713533834586466,
|
|
"calib/step_q_c_n": 133.0,
|
|
"calib/step_q_gap": 0.05163801343967128,
|
|
"calib/step_q_w": 0.3197153700189753,
|
|
"calib/step_q_w_n": 527.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 3065.0,
|
|
"completions/max_terminated_length": 3065.0,
|
|
"completions/mean_length": 559.1796875,
|
|
"completions/mean_terminated_length": 1118.359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.13445325195789337,
|
|
"learning_rate": 5.555555555555555e-07,
|
|
"loss": -1.4242,
|
|
"mask/has_final_conf_rate": 0.3828125,
|
|
"mask/share_final_conf": 0.012486796826124191,
|
|
"mask/share_reasoning": 0.2714724540710449,
|
|
"mask/share_step_conf": 0.2160407453775406,
|
|
"num_tokens": 44113582.0,
|
|
"reward": 0.42552027106285095,
|
|
"reward_std": 0.4537741541862488,
|
|
"rewards/accuracy_reward_step": 0.2265625,
|
|
"rewards/final_brier_reward_step": 0.24204804003238678,
|
|
"rewards/format_reward_step": 0.37109375,
|
|
"rewards/step_l2_reward": 0.3263075053691864,
|
|
"step": 180
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6773039102554321,
|
|
"adv/mean_abs_reasoning": 0.6787272691726685,
|
|
"adv/mean_abs_step_conf": 0.7220475673675537,
|
|
"adv/ratio_final_to_reasoning": 0.9979029000573805,
|
|
"adv/ratio_step_to_reasoning": 1.0638257812268106,
|
|
"adv/std_final_conf": 0.875947117805481,
|
|
"adv/std_reasoning": 0.8751358389854431,
|
|
"adv/std_step_conf": 0.891865611076355,
|
|
"calib/answer_extract_rate": 0.37890625,
|
|
"calib/auroc": 0.7386877828054299,
|
|
"calib/avg_num_step_conf": 2.2421875,
|
|
"calib/ece": 0.18888888888888886,
|
|
"calib/final_conf_rate": 0.38671875,
|
|
"calib/format_rate": 0.375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.12079638009049776,
|
|
"calib/mean_conf": 0.17010101010101009,
|
|
"calib/mu_c": 0.24941176470588236,
|
|
"calib/mu_w": 0.1286153846153846,
|
|
"calib/nonempty_final_conf_rate": 0.38671875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.007777777777777778,
|
|
"calib/std_conf": 0.15428882105327657,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.39999999999999997,
|
|
"calib/step_q_c_n": 67.0,
|
|
"calib/step_q_gap": 0.08163708086785015,
|
|
"calib/step_q_w": 0.3183629191321498,
|
|
"calib/step_q_w_n": 507.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.5546875,
|
|
"completions/max_length": 3043.0,
|
|
"completions/max_terminated_length": 3043.0,
|
|
"completions/mean_length": 389.5234375,
|
|
"completions/mean_terminated_length": 874.7192993164062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 132.0,
|
|
"epoch": 0.19306666666666666,
|
|
"grad_norm": 0.1355988085269928,
|
|
"learning_rate": 5.277777777777779e-07,
|
|
"loss": -1.8989,
|
|
"mask/has_final_conf_rate": 0.38671875,
|
|
"mask/share_final_conf": 0.01411872822791338,
|
|
"mask/share_reasoning": 0.25829386711120605,
|
|
"mask/share_step_conf": 0.17289991676807404,
|
|
"num_tokens": 44319564.0,
|
|
"reward": 0.44823896884918213,
|
|
"reward_std": 0.4739459753036499,
|
|
"rewards/accuracy_reward_step": 0.1328125,
|
|
"rewards/final_brier_reward_step": 0.2897332012653351,
|
|
"rewards/format_reward_step": 0.375,
|
|
"rewards/step_l2_reward": 0.3367881774902344,
|
|
"step": 181
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7794720530509949,
|
|
"adv/mean_abs_reasoning": 0.7892525792121887,
|
|
"adv/mean_abs_step_conf": 0.7970051765441895,
|
|
"adv/ratio_final_to_reasoning": 0.9876078629087832,
|
|
"adv/ratio_step_to_reasoning": 1.009822707630223,
|
|
"adv/std_final_conf": 0.9069880843162537,
|
|
"adv/std_reasoning": 0.9061655402183533,
|
|
"adv/std_step_conf": 0.8967230916023254,
|
|
"calib/answer_extract_rate": 0.4140625,
|
|
"calib/auroc": 0.672941590429275,
|
|
"calib/avg_num_step_conf": 1.984375,
|
|
"calib/ece": 0.2689719626168224,
|
|
"calib/final_conf_rate": 0.41796875,
|
|
"calib/format_rate": 0.4140625,
|
|
"calib/frac_conf_gt_0.9": 0.018691588785046728,
|
|
"calib/gap": 0.13818789584799437,
|
|
"calib/mean_conf": 0.18897196261682248,
|
|
"calib/mu_c": 0.26387755102040816,
|
|
"calib/mu_w": 0.1256896551724138,
|
|
"calib/nonempty_final_conf_rate": 0.41796875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.2071206004702062,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.33185567010309275,
|
|
"calib/step_q_c_n": 97.0,
|
|
"calib/step_q_gap": 0.01767075526124362,
|
|
"calib/step_q_w": 0.31418491484184913,
|
|
"calib/step_q_w_n": 411.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.54296875,
|
|
"completions/max_length": 3038.0,
|
|
"completions/max_terminated_length": 3038.0,
|
|
"completions/mean_length": 346.546875,
|
|
"completions/mean_terminated_length": 758.2564697265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 87.0,
|
|
"epoch": 0.19413333333333332,
|
|
"grad_norm": 0.17529059946537018,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": -2.0152,
|
|
"mask/has_final_conf_rate": 0.41796875,
|
|
"mask/share_final_conf": 0.015305116772651672,
|
|
"mask/share_reasoning": 0.2717783451080322,
|
|
"mask/share_step_conf": 0.1699477881193161,
|
|
"num_tokens": 44514440.0,
|
|
"reward": 0.49129849672317505,
|
|
"reward_std": 0.5451165437698364,
|
|
"rewards/accuracy_reward_step": 0.19140625,
|
|
"rewards/final_brier_reward_step": 0.29107969999313354,
|
|
"rewards/format_reward_step": 0.4140625,
|
|
"rewards/step_l2_reward": 0.38028234243392944,
|
|
"step": 182
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5986714363098145,
|
|
"adv/mean_abs_reasoning": 0.5951822996139526,
|
|
"adv/mean_abs_step_conf": 0.5982235670089722,
|
|
"adv/ratio_final_to_reasoning": 1.0058622991613242,
|
|
"adv/ratio_step_to_reasoning": 1.0051098081999283,
|
|
"adv/std_final_conf": 0.8110201954841614,
|
|
"adv/std_reasoning": 0.8103538751602173,
|
|
"adv/std_step_conf": 0.8114042282104492,
|
|
"calib/answer_extract_rate": 0.33984375,
|
|
"calib/auroc": 0.6104525862068966,
|
|
"calib/avg_num_step_conf": 1.90234375,
|
|
"calib/ece": 0.2295555555555555,
|
|
"calib/final_conf_rate": 0.3515625,
|
|
"calib/format_rate": 0.33984375,
|
|
"calib/frac_conf_gt_0.9": 0.05555555555555555,
|
|
"calib/gap": 0.0735668103448276,
|
|
"calib/mean_conf": 0.1897777777777778,
|
|
"calib/mu_c": 0.23718750000000002,
|
|
"calib/mu_w": 0.16362068965517243,
|
|
"calib/nonempty_final_conf_rate": 0.3515625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.03188888888888888,
|
|
"calib/std_conf": 0.23606485821475123,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3470370370370371,
|
|
"calib/step_q_c_n": 54.0,
|
|
"calib/step_q_gap": 0.021425027799161833,
|
|
"calib/step_q_w": 0.32561200923787526,
|
|
"calib/step_q_w_n": 433.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5859375,
|
|
"completions/max_length": 2882.0,
|
|
"completions/max_terminated_length": 2882.0,
|
|
"completions/mean_length": 383.5078125,
|
|
"completions/mean_terminated_length": 926.2075805664062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 97.0,
|
|
"epoch": 0.1952,
|
|
"grad_norm": 0.21741926670074463,
|
|
"learning_rate": 4.7222222222222226e-07,
|
|
"loss": -1.6925,
|
|
"mask/has_final_conf_rate": 0.3515625,
|
|
"mask/share_final_conf": 0.012612584047019482,
|
|
"mask/share_reasoning": 0.23839637637138367,
|
|
"mask/share_step_conf": 0.16305354237556458,
|
|
"num_tokens": 44719298.0,
|
|
"reward": 0.39942651987075806,
|
|
"reward_std": 0.408824622631073,
|
|
"rewards/accuracy_reward_step": 0.125,
|
|
"rewards/final_brier_reward_step": 0.24213281273841858,
|
|
"rewards/format_reward_step": 0.33984375,
|
|
"rewards/step_l2_reward": 0.30916762351989746,
|
|
"step": 183
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7714831829071045,
|
|
"adv/mean_abs_reasoning": 0.8002102971076965,
|
|
"adv/mean_abs_step_conf": 0.8143944144248962,
|
|
"adv/ratio_final_to_reasoning": 0.9641005441889161,
|
|
"adv/ratio_step_to_reasoning": 1.0177254871231565,
|
|
"adv/std_final_conf": 0.9366480708122253,
|
|
"adv/std_reasoning": 0.9359897375106812,
|
|
"adv/std_step_conf": 0.9369352459907532,
|
|
"calib/answer_extract_rate": 0.44140625,
|
|
"calib/auroc": 0.6713501291989665,
|
|
"calib/avg_num_step_conf": 1.93359375,
|
|
"calib/ece": 0.3282608695652174,
|
|
"calib/final_conf_rate": 0.44921875,
|
|
"calib/format_rate": 0.43359375,
|
|
"calib/frac_conf_gt_0.9": 0.14782608695652175,
|
|
"calib/gap": 0.19810400516795856,
|
|
"calib/mean_conf": 0.3435652173913044,
|
|
"calib/mu_c": 0.41763888888888884,
|
|
"calib/mu_w": 0.21953488372093027,
|
|
"calib/nonempty_final_conf_rate": 0.44921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.022869565217391315,
|
|
"calib/std_conf": 0.32192948540445754,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3510743801652892,
|
|
"calib/step_q_c_n": 121.0,
|
|
"calib/step_q_gap": 0.019924647544968366,
|
|
"calib/step_q_w": 0.3311497326203208,
|
|
"calib/step_q_w_n": 374.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.47265625,
|
|
"completions/max_length": 3062.0,
|
|
"completions/max_terminated_length": 3062.0,
|
|
"completions/mean_length": 451.2265625,
|
|
"completions/mean_terminated_length": 855.6592407226562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.19626666666666667,
|
|
"grad_norm": 0.12983690202236176,
|
|
"learning_rate": 4.444444444444445e-07,
|
|
"loss": -1.5325,
|
|
"mask/has_final_conf_rate": 0.44921875,
|
|
"mask/share_final_conf": 0.017030686140060425,
|
|
"mask/share_reasoning": 0.3019544780254364,
|
|
"mask/share_step_conf": 0.20835858583450317,
|
|
"num_tokens": 44940092.0,
|
|
"reward": 0.5032200813293457,
|
|
"reward_std": 0.5530544519424438,
|
|
"rewards/accuracy_reward_step": 0.28125,
|
|
"rewards/final_brier_reward_step": 0.29165664315223694,
|
|
"rewards/format_reward_step": 0.43359375,
|
|
"rewards/step_l2_reward": 0.3812098801136017,
|
|
"step": 184
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7000118494033813,
|
|
"adv/mean_abs_reasoning": 0.7548315525054932,
|
|
"adv/mean_abs_step_conf": 0.7643401622772217,
|
|
"adv/ratio_final_to_reasoning": 0.9273749183905334,
|
|
"adv/ratio_step_to_reasoning": 1.012596995634545,
|
|
"adv/std_final_conf": 0.8915876150131226,
|
|
"adv/std_reasoning": 0.9061233401298523,
|
|
"adv/std_step_conf": 0.907081127166748,
|
|
"calib/answer_extract_rate": 0.3828125,
|
|
"calib/auroc": 0.7020016339869282,
|
|
"calib/avg_num_step_conf": 1.68359375,
|
|
"calib/ece": 0.30151515151515146,
|
|
"calib/final_conf_rate": 0.38671875,
|
|
"calib/format_rate": 0.375,
|
|
"calib/frac_conf_gt_0.9": 0.050505050505050504,
|
|
"calib/gap": 0.16599264705882352,
|
|
"calib/mean_conf": 0.21363636363636365,
|
|
"calib/mu_c": 0.29411764705882354,
|
|
"calib/mu_w": 0.12812500000000002,
|
|
"calib/nonempty_final_conf_rate": 0.38671875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.2240375804760548,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.3832911392405063,
|
|
"calib/step_q_c_n": 79.0,
|
|
"calib/step_q_gap": 0.04158659378596086,
|
|
"calib/step_q_w": 0.3417045454545454,
|
|
"calib/step_q_w_n": 352.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.515625,
|
|
"completions/max_length": 3070.0,
|
|
"completions/max_terminated_length": 3070.0,
|
|
"completions/mean_length": 482.5,
|
|
"completions/mean_terminated_length": 996.1290283203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 154.0,
|
|
"epoch": 0.19733333333333333,
|
|
"grad_norm": 0.1541745811700821,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"loss": -1.5324,
|
|
"mask/has_final_conf_rate": 0.38671875,
|
|
"mask/share_final_conf": 0.011759690940380096,
|
|
"mask/share_reasoning": 0.2782679796218872,
|
|
"mask/share_step_conf": 0.1943473368883133,
|
|
"num_tokens": 45170532.0,
|
|
"reward": 0.4367647171020508,
|
|
"reward_std": 0.502619743347168,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.2577418088912964,
|
|
"rewards/format_reward_step": 0.375,
|
|
"rewards/step_l2_reward": 0.333962619304657,
|
|
"step": 185
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7071709632873535,
|
|
"adv/mean_abs_reasoning": 0.7387014627456665,
|
|
"adv/mean_abs_step_conf": 0.7503018379211426,
|
|
"adv/ratio_final_to_reasoning": 0.9573163164709085,
|
|
"adv/ratio_step_to_reasoning": 1.0157037392783261,
|
|
"adv/std_final_conf": 0.8916911482810974,
|
|
"adv/std_reasoning": 0.9060976505279541,
|
|
"adv/std_step_conf": 0.9071915745735168,
|
|
"calib/answer_extract_rate": 0.390625,
|
|
"calib/auroc": 0.7592592592592592,
|
|
"calib/avg_num_step_conf": 1.75,
|
|
"calib/ece": 0.22000000000000003,
|
|
"calib/final_conf_rate": 0.39453125,
|
|
"calib/format_rate": 0.390625,
|
|
"calib/frac_conf_gt_0.9": 0.12871287128712872,
|
|
"calib/gap": 0.2488416075650118,
|
|
"calib/mean_conf": 0.298019801980198,
|
|
"calib/mu_c": 0.43106382978723407,
|
|
"calib/mu_w": 0.18222222222222226,
|
|
"calib/nonempty_final_conf_rate": 0.39453125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.026336633663366357,
|
|
"calib/std_conf": 0.3058052826473249,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.37469879518072285,
|
|
"calib/step_q_c_n": 83.0,
|
|
"calib/step_q_gap": 0.0377672883314078,
|
|
"calib/step_q_w": 0.33693150684931505,
|
|
"calib/step_q_w_n": 365.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.51953125,
|
|
"completions/max_length": 3048.0,
|
|
"completions/max_terminated_length": 3048.0,
|
|
"completions/mean_length": 479.26171875,
|
|
"completions/mean_terminated_length": 997.48779296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.0,
|
|
"epoch": 0.1984,
|
|
"grad_norm": 0.09268806129693985,
|
|
"learning_rate": 3.8888888888888895e-07,
|
|
"loss": -1.891,
|
|
"mask/has_final_conf_rate": 0.39453125,
|
|
"mask/share_final_conf": 0.012729963287711143,
|
|
"mask/share_reasoning": 0.2655085027217865,
|
|
"mask/share_step_conf": 0.2022302895784378,
|
|
"num_tokens": 45398263.0,
|
|
"reward": 0.47248899936676025,
|
|
"reward_std": 0.5240097045898438,
|
|
"rewards/accuracy_reward_step": 0.18359375,
|
|
"rewards/final_brier_reward_step": 0.29347658157348633,
|
|
"rewards/format_reward_step": 0.390625,
|
|
"rewards/step_l2_reward": 0.35777178406715393,
|
|
"step": 186
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7503759860992432,
|
|
"adv/mean_abs_reasoning": 0.7579132318496704,
|
|
"adv/mean_abs_step_conf": 0.7848829627037048,
|
|
"adv/ratio_final_to_reasoning": 0.990055265650881,
|
|
"adv/ratio_step_to_reasoning": 1.0355841931776484,
|
|
"adv/std_final_conf": 0.9218424558639526,
|
|
"adv/std_reasoning": 0.921066164970398,
|
|
"adv/std_step_conf": 0.9221832752227783,
|
|
"calib/answer_extract_rate": 0.375,
|
|
"calib/auroc": 0.6381551362683437,
|
|
"calib/avg_num_step_conf": 1.7109375,
|
|
"calib/ece": 0.2854081632653061,
|
|
"calib/final_conf_rate": 0.3828125,
|
|
"calib/format_rate": 0.3671875,
|
|
"calib/frac_conf_gt_0.9": 0.04081632653061224,
|
|
"calib/gap": 0.06703144654088047,
|
|
"calib/mean_conf": 0.22908163265306125,
|
|
"calib/mu_c": 0.2653333333333333,
|
|
"calib/mu_w": 0.19830188679245284,
|
|
"calib/nonempty_final_conf_rate": 0.3828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.027653061224489794,
|
|
"calib/std_conf": 0.21257453307397892,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.33675,
|
|
"calib/step_q_c_n": 80.0,
|
|
"calib/step_q_gap": 0.001498603351955341,
|
|
"calib/step_q_w": 0.33525139664804465,
|
|
"calib/step_q_w_n": 358.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.53515625,
|
|
"completions/max_length": 3058.0,
|
|
"completions/max_terminated_length": 3058.0,
|
|
"completions/mean_length": 446.62109375,
|
|
"completions/mean_terminated_length": 960.79833984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 116.0,
|
|
"epoch": 0.19946666666666665,
|
|
"grad_norm": 0.15147480368614197,
|
|
"learning_rate": 3.611111111111111e-07,
|
|
"loss": -1.9245,
|
|
"mask/has_final_conf_rate": 0.3828125,
|
|
"mask/share_final_conf": 0.0132368765771389,
|
|
"mask/share_reasoning": 0.24744805693626404,
|
|
"mask/share_step_conf": 0.20415881276130676,
|
|
"num_tokens": 45614142.0,
|
|
"reward": 0.4245920777320862,
|
|
"reward_std": 0.5307902097702026,
|
|
"rewards/accuracy_reward_step": 0.17578125,
|
|
"rewards/final_brier_reward_step": 0.2493503987789154,
|
|
"rewards/format_reward_step": 0.3671875,
|
|
"rewards/step_l2_reward": 0.3274933695793152,
|
|
"step": 187
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7576410174369812,
|
|
"adv/mean_abs_reasoning": 0.728103756904602,
|
|
"adv/mean_abs_step_conf": 0.7716506123542786,
|
|
"adv/ratio_final_to_reasoning": 1.0405673782785456,
|
|
"adv/ratio_step_to_reasoning": 1.0598085850220138,
|
|
"adv/std_final_conf": 0.9068464636802673,
|
|
"adv/std_reasoning": 0.8908753991127014,
|
|
"adv/std_step_conf": 0.9070549607276917,
|
|
"calib/answer_extract_rate": 0.47265625,
|
|
"calib/auroc": 0.609681697612732,
|
|
"calib/avg_num_step_conf": 1.5625,
|
|
"calib/ece": 0.2597560975609756,
|
|
"calib/final_conf_rate": 0.48046875,
|
|
"calib/format_rate": 0.46875,
|
|
"calib/frac_conf_gt_0.9": 0.04065040650406504,
|
|
"calib/gap": 0.12151724137931033,
|
|
"calib/mean_conf": 0.23130081300813007,
|
|
"calib/mu_c": 0.29551724137931035,
|
|
"calib/mu_w": 0.17400000000000002,
|
|
"calib/nonempty_final_conf_rate": 0.48046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.009756097560975606,
|
|
"calib/std_conf": 0.22595042031021628,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.3656321839080459,
|
|
"calib/step_q_c_n": 87.0,
|
|
"calib/step_q_gap": 0.021127391575777588,
|
|
"calib/step_q_w": 0.34450479233226833,
|
|
"calib/step_q_w_n": 313.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 3071.0,
|
|
"completions/max_terminated_length": 3071.0,
|
|
"completions/mean_length": 514.01171875,
|
|
"completions/mean_terminated_length": 913.7986450195312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 127.0,
|
|
"epoch": 0.20053333333333334,
|
|
"grad_norm": 0.14727431535720825,
|
|
"learning_rate": 3.3333333333333335e-07,
|
|
"loss": -1.3545,
|
|
"mask/has_final_conf_rate": 0.48046875,
|
|
"mask/share_final_conf": 0.01569814234972,
|
|
"mask/share_reasoning": 0.31940892338752747,
|
|
"mask/share_step_conf": 0.22739294171333313,
|
|
"num_tokens": 45849801.0,
|
|
"reward": 0.5597624778747559,
|
|
"reward_std": 0.5230478644371033,
|
|
"rewards/accuracy_reward_step": 0.2265625,
|
|
"rewards/final_brier_reward_step": 0.32674336433410645,
|
|
"rewards/format_reward_step": 0.46875,
|
|
"rewards/step_l2_reward": 0.435812771320343,
|
|
"step": 188
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7794215679168701,
|
|
"adv/mean_abs_reasoning": 0.8135305643081665,
|
|
"adv/mean_abs_step_conf": 0.8352118134498596,
|
|
"adv/ratio_final_to_reasoning": 0.9580728765608174,
|
|
"adv/ratio_step_to_reasoning": 1.0266508107905339,
|
|
"adv/std_final_conf": 0.9219034910202026,
|
|
"adv/std_reasoning": 0.935815155506134,
|
|
"adv/std_step_conf": 0.9369481205940247,
|
|
"calib/answer_extract_rate": 0.4140625,
|
|
"calib/auroc": 0.662152530292231,
|
|
"calib/avg_num_step_conf": 1.546875,
|
|
"calib/ece": 0.23214953271028038,
|
|
"calib/final_conf_rate": 0.41796875,
|
|
"calib/format_rate": 0.3984375,
|
|
"calib/frac_conf_gt_0.9": 0.08411214953271028,
|
|
"calib/gap": 0.1729116179615111,
|
|
"calib/mean_conf": 0.22859813084112152,
|
|
"calib/mu_c": 0.3271739130434783,
|
|
"calib/mu_w": 0.1542622950819672,
|
|
"calib/nonempty_final_conf_rate": 0.41796875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.015420560747663549,
|
|
"calib/std_conf": 0.2735760025210266,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.3747826086956521,
|
|
"calib/step_q_c_n": 69.0,
|
|
"calib/step_q_gap": 0.04169392368036162,
|
|
"calib/step_q_w": 0.3330886850152905,
|
|
"calib/step_q_w_n": 327.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.515625,
|
|
"completions/max_length": 3010.0,
|
|
"completions/max_terminated_length": 3010.0,
|
|
"completions/mean_length": 376.17578125,
|
|
"completions/mean_terminated_length": 776.6209716796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 98.0,
|
|
"epoch": 0.2016,
|
|
"grad_norm": 0.2416364550590515,
|
|
"learning_rate": 3.055555555555556e-07,
|
|
"loss": -1.9951,
|
|
"mask/has_final_conf_rate": 0.41796875,
|
|
"mask/share_final_conf": 0.01586807146668434,
|
|
"mask/share_reasoning": 0.28485164046287537,
|
|
"mask/share_step_conf": 0.1836552917957306,
|
|
"num_tokens": 46053870.0,
|
|
"reward": 0.46894943714141846,
|
|
"reward_std": 0.5665953755378723,
|
|
"rewards/accuracy_reward_step": 0.1796875,
|
|
"rewards/final_brier_reward_step": 0.2840324342250824,
|
|
"rewards/format_reward_step": 0.3984375,
|
|
"rewards/step_l2_reward": 0.3588276207447052,
|
|
"step": 189
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7758157253265381,
|
|
"adv/mean_abs_reasoning": 0.7713011503219604,
|
|
"adv/mean_abs_step_conf": 0.8179733753204346,
|
|
"adv/ratio_final_to_reasoning": 1.0058531936620256,
|
|
"adv/ratio_step_to_reasoning": 1.0605110273451452,
|
|
"adv/std_final_conf": 0.921683132648468,
|
|
"adv/std_reasoning": 0.9060755372047424,
|
|
"adv/std_step_conf": 0.922048807144165,
|
|
"calib/answer_extract_rate": 0.44140625,
|
|
"calib/auroc": 0.637952366223322,
|
|
"calib/avg_num_step_conf": 1.52734375,
|
|
"calib/ece": 0.26342105263157894,
|
|
"calib/final_conf_rate": 0.4453125,
|
|
"calib/format_rate": 0.4375,
|
|
"calib/frac_conf_gt_0.9": 0.02631578947368421,
|
|
"calib/gap": 0.0982523971543458,
|
|
"calib/mean_conf": 0.24535087719298246,
|
|
"calib/mu_c": 0.2979245283018868,
|
|
"calib/mu_w": 0.199672131147541,
|
|
"calib/nonempty_final_conf_rate": 0.4453125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.02192982456140351,
|
|
"calib/std_conf": 0.22736931787239203,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.37897435897435894,
|
|
"calib/step_q_c_n": 78.0,
|
|
"calib/step_q_gap": 0.04494879986892769,
|
|
"calib/step_q_w": 0.33402555910543125,
|
|
"calib/step_q_w_n": 313.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.47265625,
|
|
"completions/max_length": 3067.0,
|
|
"completions/max_terminated_length": 3067.0,
|
|
"completions/mean_length": 502.70703125,
|
|
"completions/mean_terminated_length": 953.281494140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 103.0,
|
|
"epoch": 0.20266666666666666,
|
|
"grad_norm": 0.1024363711476326,
|
|
"learning_rate": 2.7777777777777776e-07,
|
|
"loss": -1.7315,
|
|
"mask/has_final_conf_rate": 0.4453125,
|
|
"mask/share_final_conf": 0.014365598559379578,
|
|
"mask/share_reasoning": 0.27649250626564026,
|
|
"mask/share_step_conf": 0.23648564517498016,
|
|
"num_tokens": 46288171.0,
|
|
"reward": 0.5105445384979248,
|
|
"reward_std": 0.5440113544464111,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.30749452114105225,
|
|
"rewards/format_reward_step": 0.4375,
|
|
"rewards/step_l2_reward": 0.38979214429855347,
|
|
"step": 190
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7739953994750977,
|
|
"adv/mean_abs_reasoning": 0.753760814666748,
|
|
"adv/mean_abs_step_conf": 0.7896829843521118,
|
|
"adv/ratio_final_to_reasoning": 1.026844835144814,
|
|
"adv/ratio_step_to_reasoning": 1.0476572527868082,
|
|
"adv/std_final_conf": 0.9358944296836853,
|
|
"adv/std_reasoning": 0.9209585189819336,
|
|
"adv/std_step_conf": 0.9367083311080933,
|
|
"calib/answer_extract_rate": 0.4765625,
|
|
"calib/auroc": 0.594832251082251,
|
|
"calib/avg_num_step_conf": 1.3984375,
|
|
"calib/ece": 0.28459016393442627,
|
|
"calib/final_conf_rate": 0.4765625,
|
|
"calib/format_rate": 0.46875,
|
|
"calib/frac_conf_gt_0.9": 0.06557377049180328,
|
|
"calib/gap": 0.04901515151515151,
|
|
"calib/mean_conf": 0.27098360655737713,
|
|
"calib/mu_c": 0.2975,
|
|
"calib/mu_w": 0.24848484848484848,
|
|
"calib/nonempty_final_conf_rate": 0.4765625,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.04827868852459017,
|
|
"calib/std_conf": 0.25365980133482374,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.3499999999999999,
|
|
"calib/step_q_c_n": 65.0,
|
|
"calib/step_q_gap": -0.0038225255972696437,
|
|
"calib/step_q_w": 0.35382252559726957,
|
|
"calib/step_q_w_n": 293.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4609375,
|
|
"completions/max_length": 3060.0,
|
|
"completions/max_terminated_length": 3060.0,
|
|
"completions/mean_length": 459.91015625,
|
|
"completions/mean_terminated_length": 853.1666870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 132.0,
|
|
"epoch": 0.20373333333333332,
|
|
"grad_norm": 0.1299065798521042,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": -1.4379,
|
|
"mask/has_final_conf_rate": 0.4765625,
|
|
"mask/share_final_conf": 0.01533226203173399,
|
|
"mask/share_reasoning": 0.29629403352737427,
|
|
"mask/share_step_conf": 0.22743621468544006,
|
|
"num_tokens": 46510076.0,
|
|
"reward": 0.533978283405304,
|
|
"reward_std": 0.5094114542007446,
|
|
"rewards/accuracy_reward_step": 0.21875,
|
|
"rewards/final_brier_reward_step": 0.3145742118358612,
|
|
"rewards/format_reward_step": 0.46875,
|
|
"rewards/step_l2_reward": 0.41058823466300964,
|
|
"step": 191
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.732417643070221,
|
|
"adv/mean_abs_reasoning": 0.7503454685211182,
|
|
"adv/mean_abs_step_conf": 0.7655529975891113,
|
|
"adv/ratio_final_to_reasoning": 0.9761072383281906,
|
|
"adv/ratio_step_to_reasoning": 1.0202673697729743,
|
|
"adv/std_final_conf": 0.9219067692756653,
|
|
"adv/std_reasoning": 0.9209879040718079,
|
|
"adv/std_step_conf": 0.9221699237823486,
|
|
"calib/answer_extract_rate": 0.4375,
|
|
"calib/auroc": 0.5488588878174221,
|
|
"calib/avg_num_step_conf": 1.41796875,
|
|
"calib/ece": 0.3138839285714286,
|
|
"calib/final_conf_rate": 0.4375,
|
|
"calib/format_rate": 0.421875,
|
|
"calib/frac_conf_gt_0.9": 0.08035714285714286,
|
|
"calib/gap": 0.016311475409836074,
|
|
"calib/mean_conf": 0.26111607142857146,
|
|
"calib/mu_c": 0.27,
|
|
"calib/mu_w": 0.25368852459016394,
|
|
"calib/nonempty_final_conf_rate": 0.4375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.05982142857142858,
|
|
"calib/std_conf": 0.26199245986749975,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.34548387096774186,
|
|
"calib/step_q_c_n": 62.0,
|
|
"calib/step_q_gap": 0.0072778909012966575,
|
|
"calib/step_q_w": 0.3382059800664452,
|
|
"calib/step_q_w_n": 301.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.45703125,
|
|
"completions/max_length": 3042.0,
|
|
"completions/max_terminated_length": 3042.0,
|
|
"completions/mean_length": 508.93359375,
|
|
"completions/mean_terminated_length": 937.3165893554688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 97.0,
|
|
"epoch": 0.2048,
|
|
"grad_norm": 0.21384847164154053,
|
|
"learning_rate": 2.2222222222222224e-07,
|
|
"loss": -1.3701,
|
|
"mask/has_final_conf_rate": 0.4375,
|
|
"mask/share_final_conf": 0.013786101713776588,
|
|
"mask/share_reasoning": 0.33207717537879944,
|
|
"mask/share_step_conf": 0.19710546731948853,
|
|
"num_tokens": 46745339.0,
|
|
"reward": 0.48342451453208923,
|
|
"reward_std": 0.5122061371803284,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.27475929260253906,
|
|
"rewards/format_reward_step": 0.421875,
|
|
"rewards/step_l2_reward": 0.37858062982559204,
|
|
"step": 192
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7556190490722656,
|
|
"adv/mean_abs_reasoning": 0.7361305952072144,
|
|
"adv/mean_abs_step_conf": 0.7891688346862793,
|
|
"adv/ratio_final_to_reasoning": 1.0264741799783033,
|
|
"adv/ratio_step_to_reasoning": 1.0720500408818563,
|
|
"adv/std_final_conf": 0.9069178700447083,
|
|
"adv/std_reasoning": 0.8907724618911743,
|
|
"adv/std_step_conf": 0.9070672988891602,
|
|
"calib/answer_extract_rate": 0.49609375,
|
|
"calib/auroc": 0.6909090909090909,
|
|
"calib/avg_num_step_conf": 1.55078125,
|
|
"calib/ece": 0.24330708661417322,
|
|
"calib/final_conf_rate": 0.49609375,
|
|
"calib/format_rate": 0.48828125,
|
|
"calib/frac_conf_gt_0.9": 0.06299212598425197,
|
|
"calib/gap": 0.13722222222222233,
|
|
"calib/mean_conf": 0.24220472440944882,
|
|
"calib/mu_c": 0.3200000000000001,
|
|
"calib/mu_w": 0.1827777777777778,
|
|
"calib/nonempty_final_conf_rate": 0.49609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.02622047244094488,
|
|
"calib/std_conf": 0.2458855549222285,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.34407894736842104,
|
|
"calib/step_q_c_n": 76.0,
|
|
"calib/step_q_gap": 0.009686424003935101,
|
|
"calib/step_q_w": 0.33439252336448594,
|
|
"calib/step_q_w_n": 321.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.453125,
|
|
"completions/max_length": 3049.0,
|
|
"completions/max_terminated_length": 3049.0,
|
|
"completions/mean_length": 446.3515625,
|
|
"completions/mean_terminated_length": 816.1857299804688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.20586666666666667,
|
|
"grad_norm": 0.12955591082572937,
|
|
"learning_rate": 1.9444444444444447e-07,
|
|
"loss": -1.5353,
|
|
"mask/has_final_conf_rate": 0.49609375,
|
|
"mask/share_final_conf": 0.0178175400942564,
|
|
"mask/share_reasoning": 0.31756535172462463,
|
|
"mask/share_step_conf": 0.21149210631847382,
|
|
"num_tokens": 46965317.0,
|
|
"reward": 0.5772705674171448,
|
|
"reward_std": 0.5131789445877075,
|
|
"rewards/accuracy_reward_step": 0.21484375,
|
|
"rewards/final_brier_reward_step": 0.3521054685115814,
|
|
"rewards/format_reward_step": 0.48828125,
|
|
"rewards/step_l2_reward": 0.4412071108818054,
|
|
"step": 193
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.8192306160926819,
|
|
"adv/mean_abs_reasoning": 0.819528341293335,
|
|
"adv/mean_abs_step_conf": 0.8271200656890869,
|
|
"adv/ratio_final_to_reasoning": 0.9996367115258232,
|
|
"adv/ratio_step_to_reasoning": 1.009263528804594,
|
|
"adv/std_final_conf": 0.9367586374282837,
|
|
"adv/std_reasoning": 0.9357778429985046,
|
|
"adv/std_step_conf": 0.9369503855705261,
|
|
"calib/answer_extract_rate": 0.453125,
|
|
"calib/auroc": 0.7305976806422836,
|
|
"calib/avg_num_step_conf": 1.4296875,
|
|
"calib/ece": 0.2682758620689655,
|
|
"calib/final_conf_rate": 0.453125,
|
|
"calib/format_rate": 0.453125,
|
|
"calib/frac_conf_gt_0.9": 0.06896551724137931,
|
|
"calib/gap": 0.20591733571216173,
|
|
"calib/mean_conf": 0.25017241379310345,
|
|
"calib/mu_c": 0.3513559322033898,
|
|
"calib/mu_w": 0.14543859649122806,
|
|
"calib/nonempty_final_conf_rate": 0.453125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.004913793103448276,
|
|
"calib/std_conf": 0.2617579256733104,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3655294117647058,
|
|
"calib/step_q_c_n": 85.0,
|
|
"calib/step_q_gap": 0.017522294326983434,
|
|
"calib/step_q_w": 0.3480071174377224,
|
|
"calib/step_q_w_n": 281.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.50390625,
|
|
"completions/max_length": 2978.0,
|
|
"completions/max_terminated_length": 2978.0,
|
|
"completions/mean_length": 342.58984375,
|
|
"completions/mean_terminated_length": 690.5748291015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 116.0,
|
|
"epoch": 0.20693333333333333,
|
|
"grad_norm": 0.19686636328697205,
|
|
"learning_rate": 1.6666666666666668e-07,
|
|
"loss": -2.0312,
|
|
"mask/has_final_conf_rate": 0.453125,
|
|
"mask/share_final_conf": 0.020617563277482986,
|
|
"mask/share_reasoning": 0.26393723487854004,
|
|
"mask/share_step_conf": 0.21153897047042847,
|
|
"num_tokens": 47158964.0,
|
|
"reward": 0.53669673204422,
|
|
"reward_std": 0.5804100036621094,
|
|
"rewards/accuracy_reward_step": 0.23046875,
|
|
"rewards/final_brier_reward_step": 0.3252031207084656,
|
|
"rewards/format_reward_step": 0.453125,
|
|
"rewards/step_l2_reward": 0.4076477289199829,
|
|
"step": 194
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.8255274295806885,
|
|
"adv/mean_abs_reasoning": 0.8241317868232727,
|
|
"adv/mean_abs_step_conf": 0.818082869052887,
|
|
"adv/ratio_final_to_reasoning": 1.0016934703644855,
|
|
"adv/ratio_step_to_reasoning": 0.9926602542613941,
|
|
"adv/std_final_conf": 0.9363267421722412,
|
|
"adv/std_reasoning": 0.9357849359512329,
|
|
"adv/std_step_conf": 0.9369533658027649,
|
|
"calib/answer_extract_rate": 0.52734375,
|
|
"calib/auroc": 0.6524175824175824,
|
|
"calib/avg_num_step_conf": 1.3046875,
|
|
"calib/ece": 0.2911851851851852,
|
|
"calib/final_conf_rate": 0.52734375,
|
|
"calib/format_rate": 0.515625,
|
|
"calib/frac_conf_gt_0.9": 0.02962962962962963,
|
|
"calib/gap": 0.10741758241758251,
|
|
"calib/mean_conf": 0.23414814814814813,
|
|
"calib/mu_c": 0.2898461538461539,
|
|
"calib/mu_w": 0.1824285714285714,
|
|
"calib/nonempty_final_conf_rate": 0.52734375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.021925925925925925,
|
|
"calib/std_conf": 0.23289923169946325,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3679761904761904,
|
|
"calib/step_q_c_n": 84.0,
|
|
"calib/step_q_gap": 0.026256190476190433,
|
|
"calib/step_q_w": 0.34171999999999997,
|
|
"calib/step_q_w_n": 250.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.40625,
|
|
"completions/max_length": 3054.0,
|
|
"completions/max_terminated_length": 3054.0,
|
|
"completions/mean_length": 458.34765625,
|
|
"completions/mean_terminated_length": 771.9539794921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 110.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.09223850816488266,
|
|
"learning_rate": 1.3888888888888888e-07,
|
|
"loss": -1.6689,
|
|
"mask/has_final_conf_rate": 0.52734375,
|
|
"mask/share_final_conf": 0.02058326080441475,
|
|
"mask/share_reasoning": 0.3318544030189514,
|
|
"mask/share_step_conf": 0.24131232500076294,
|
|
"num_tokens": 47382285.0,
|
|
"reward": 0.6079497337341309,
|
|
"reward_std": 0.5632410049438477,
|
|
"rewards/accuracy_reward_step": 0.25390625,
|
|
"rewards/final_brier_reward_step": 0.35428789258003235,
|
|
"rewards/format_reward_step": 0.515625,
|
|
"rewards/step_l2_reward": 0.47180354595184326,
|
|
"step": 195
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.8046687841415405,
|
|
"adv/mean_abs_reasoning": 0.7653719186782837,
|
|
"adv/mean_abs_step_conf": 0.8247447609901428,
|
|
"adv/ratio_final_to_reasoning": 1.0513434900134804,
|
|
"adv/ratio_step_to_reasoning": 1.0775738446406418,
|
|
"adv/std_final_conf": 0.9367269277572632,
|
|
"adv/std_reasoning": 0.9060116410255432,
|
|
"adv/std_step_conf": 0.9361777305603027,
|
|
"calib/answer_extract_rate": 0.49609375,
|
|
"calib/auroc": 0.7307135969141756,
|
|
"calib/avg_num_step_conf": 1.3203125,
|
|
"calib/ece": 0.2507751937984496,
|
|
"calib/final_conf_rate": 0.50390625,
|
|
"calib/format_rate": 0.484375,
|
|
"calib/frac_conf_gt_0.9": 0.13953488372093023,
|
|
"calib/gap": 0.21747830279652844,
|
|
"calib/mean_conf": 0.3462790697674419,
|
|
"calib/mu_c": 0.4491176470588235,
|
|
"calib/mu_w": 0.23163934426229507,
|
|
"calib/nonempty_final_conf_rate": 0.50390625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.03496124031007749,
|
|
"calib/std_conf": 0.3126557156660814,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4024675324675324,
|
|
"calib/step_q_c_n": 77.0,
|
|
"calib/step_q_gap": 0.057831517141861966,
|
|
"calib/step_q_w": 0.34463601532567045,
|
|
"calib/step_q_w_n": 261.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.453125,
|
|
"completions/max_length": 2871.0,
|
|
"completions/max_terminated_length": 2871.0,
|
|
"completions/mean_length": 368.390625,
|
|
"completions/mean_terminated_length": 673.6286010742188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.20906666666666668,
|
|
"grad_norm": 0.1020633801817894,
|
|
"learning_rate": 1.1111111111111112e-07,
|
|
"loss": -1.5135,
|
|
"mask/has_final_conf_rate": 0.50390625,
|
|
"mask/share_final_conf": 0.021770047023892403,
|
|
"mask/share_reasoning": 0.3151125907897949,
|
|
"mask/share_step_conf": 0.20999234914779663,
|
|
"num_tokens": 47579137.0,
|
|
"reward": 0.5831013321876526,
|
|
"reward_std": 0.5391672849655151,
|
|
"rewards/accuracy_reward_step": 0.265625,
|
|
"rewards/final_brier_reward_step": 0.3524019718170166,
|
|
"rewards/format_reward_step": 0.484375,
|
|
"rewards/step_l2_reward": 0.44253382086753845,
|
|
"step": 196
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7114823460578918,
|
|
"adv/mean_abs_reasoning": 0.7125142812728882,
|
|
"adv/mean_abs_step_conf": 0.7193067073822021,
|
|
"adv/ratio_final_to_reasoning": 0.9985516989032798,
|
|
"adv/ratio_step_to_reasoning": 1.0095330385479144,
|
|
"adv/std_final_conf": 0.8917362093925476,
|
|
"adv/std_reasoning": 0.8907146453857422,
|
|
"adv/std_step_conf": 0.8919350504875183,
|
|
"calib/answer_extract_rate": 0.33984375,
|
|
"calib/auroc": 0.6805555555555556,
|
|
"calib/avg_num_step_conf": 1.375,
|
|
"calib/ece": 0.18109890109890112,
|
|
"calib/final_conf_rate": 0.35546875,
|
|
"calib/format_rate": 0.33203125,
|
|
"calib/frac_conf_gt_0.9": 0.13186813186813187,
|
|
"calib/gap": 0.21496969696969695,
|
|
"calib/mean_conf": 0.2934065934065934,
|
|
"calib/mu_c": 0.42333333333333334,
|
|
"calib/mu_w": 0.2083636363636364,
|
|
"calib/nonempty_final_conf_rate": 0.35546875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.039450549450549474,
|
|
"calib/std_conf": 0.30445129811442734,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.39638297872340433,
|
|
"calib/step_q_c_n": 47.0,
|
|
"calib/step_q_gap": 0.048415765608650296,
|
|
"calib/step_q_w": 0.34796721311475404,
|
|
"calib/step_q_w_n": 305.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.59375,
|
|
"completions/max_length": 2987.0,
|
|
"completions/max_terminated_length": 2987.0,
|
|
"completions/mean_length": 340.95703125,
|
|
"completions/mean_terminated_length": 839.2788696289062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 144.0,
|
|
"epoch": 0.21013333333333334,
|
|
"grad_norm": 0.22904542088508606,
|
|
"learning_rate": 8.333333333333334e-08,
|
|
"loss": -2.1411,
|
|
"mask/has_final_conf_rate": 0.35546875,
|
|
"mask/share_final_conf": 0.011857477948069572,
|
|
"mask/share_reasoning": 0.23438555002212524,
|
|
"mask/share_step_conf": 0.16000698506832123,
|
|
"num_tokens": 47771478.0,
|
|
"reward": 0.40049824118614197,
|
|
"reward_std": 0.5027368664741516,
|
|
"rewards/accuracy_reward_step": 0.140625,
|
|
"rewards/final_brier_reward_step": 0.25186797976493835,
|
|
"rewards/format_reward_step": 0.33203125,
|
|
"rewards/step_l2_reward": 0.30306482315063477,
|
|
"step": 197
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7501786351203918,
|
|
"adv/mean_abs_reasoning": 0.7775917053222656,
|
|
"adv/mean_abs_step_conf": 0.7811833620071411,
|
|
"adv/ratio_final_to_reasoning": 0.9647461900451823,
|
|
"adv/ratio_step_to_reasoning": 1.0046189493281528,
|
|
"adv/std_final_conf": 0.9219053983688354,
|
|
"adv/std_reasoning": 0.921049952507019,
|
|
"adv/std_step_conf": 0.9221871495246887,
|
|
"calib/answer_extract_rate": 0.46484375,
|
|
"calib/auroc": 0.6527815468113976,
|
|
"calib/avg_num_step_conf": 1.265625,
|
|
"calib/ece": 0.2981967213114755,
|
|
"calib/final_conf_rate": 0.4765625,
|
|
"calib/format_rate": 0.45703125,
|
|
"calib/frac_conf_gt_0.9": 0.08196721311475409,
|
|
"calib/gap": 0.13176119402985073,
|
|
"calib/mean_conf": 0.27836065573770485,
|
|
"calib/mu_c": 0.3377611940298507,
|
|
"calib/mu_w": 0.206,
|
|
"calib/nonempty_final_conf_rate": 0.4765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.013688524590163946,
|
|
"calib/std_conf": 0.24966653384897258,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.3542857142857142,
|
|
"calib/step_q_c_n": 70.0,
|
|
"calib/step_q_gap": 0.005860517435320511,
|
|
"calib/step_q_w": 0.3484251968503937,
|
|
"calib/step_q_w_n": 254.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4609375,
|
|
"completions/max_length": 3072.0,
|
|
"completions/max_terminated_length": 3072.0,
|
|
"completions/mean_length": 443.6171875,
|
|
"completions/mean_terminated_length": 822.9420166015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 120.0,
|
|
"epoch": 0.2112,
|
|
"grad_norm": 0.13247403502464294,
|
|
"learning_rate": 5.555555555555556e-08,
|
|
"loss": -1.6816,
|
|
"mask/has_final_conf_rate": 0.4765625,
|
|
"mask/share_final_conf": 0.018223222345113754,
|
|
"mask/share_reasoning": 0.29040613770484924,
|
|
"mask/share_step_conf": 0.2304331362247467,
|
|
"num_tokens": 47990428.0,
|
|
"reward": 0.5378419160842896,
|
|
"reward_std": 0.5349186062812805,
|
|
"rewards/accuracy_reward_step": 0.26171875,
|
|
"rewards/final_brier_reward_step": 0.31049844622612,
|
|
"rewards/format_reward_step": 0.45703125,
|
|
"rewards/step_l2_reward": 0.41429033875465393,
|
|
"step": 198
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7660308480262756,
|
|
"adv/mean_abs_reasoning": 0.7764954566955566,
|
|
"adv/mean_abs_step_conf": 0.793433666229248,
|
|
"adv/ratio_final_to_reasoning": 0.9865232840977408,
|
|
"adv/ratio_step_to_reasoning": 1.0218136621246612,
|
|
"adv/std_final_conf": 0.9069642424583435,
|
|
"adv/std_reasoning": 0.9061921238899231,
|
|
"adv/std_step_conf": 0.9068816304206848,
|
|
"calib/answer_extract_rate": 0.57421875,
|
|
"calib/auroc": 0.6813743218806512,
|
|
"calib/avg_num_step_conf": 1.2890625,
|
|
"calib/ece": 0.23738255033557049,
|
|
"calib/final_conf_rate": 0.58203125,
|
|
"calib/format_rate": 0.5625,
|
|
"calib/frac_conf_gt_0.9": 0.11409395973154363,
|
|
"calib/gap": 0.19223508137432188,
|
|
"calib/mean_conf": 0.33563758389261744,
|
|
"calib/mu_c": 0.4259493670886076,
|
|
"calib/mu_w": 0.2337142857142857,
|
|
"calib/nonempty_final_conf_rate": 0.58203125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.02140939597315436,
|
|
"calib/std_conf": 0.2951448788765415,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3555670103092783,
|
|
"calib/step_q_c_n": 97.0,
|
|
"calib/step_q_gap": 0.00912924206893495,
|
|
"calib/step_q_w": 0.34643776824034334,
|
|
"calib/step_q_w_n": 233.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.37109375,
|
|
"completions/max_length": 3061.0,
|
|
"completions/max_terminated_length": 3061.0,
|
|
"completions/mean_length": 508.765625,
|
|
"completions/mean_terminated_length": 808.9689331054688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 93.0,
|
|
"epoch": 0.21226666666666666,
|
|
"grad_norm": 0.1422998607158661,
|
|
"learning_rate": 2.777777777777778e-08,
|
|
"loss": -1.4611,
|
|
"mask/has_final_conf_rate": 0.58203125,
|
|
"mask/share_final_conf": 0.018554989248514175,
|
|
"mask/share_reasoning": 0.3966650366783142,
|
|
"mask/share_step_conf": 0.21368621289730072,
|
|
"num_tokens": 48224872.0,
|
|
"reward": 0.6758462190628052,
|
|
"reward_std": 0.5250706672668457,
|
|
"rewards/accuracy_reward_step": 0.3125,
|
|
"rewards/final_brier_reward_step": 0.40119296312332153,
|
|
"rewards/format_reward_step": 0.5625,
|
|
"rewards/step_l2_reward": 0.5169996619224548,
|
|
"step": 199
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7240675687789917,
|
|
"adv/mean_abs_reasoning": 0.7000408172607422,
|
|
"adv/mean_abs_step_conf": 0.7183365821838379,
|
|
"adv/ratio_final_to_reasoning": 1.0343219294158676,
|
|
"adv/ratio_step_to_reasoning": 1.0261352830749026,
|
|
"adv/std_final_conf": 0.8912106156349182,
|
|
"adv/std_reasoning": 0.8754467368125916,
|
|
"adv/std_step_conf": 0.8915499448776245,
|
|
"calib/answer_extract_rate": 0.515625,
|
|
"calib/auroc": 0.75,
|
|
"calib/avg_num_step_conf": 1.1796875,
|
|
"calib/ece": 0.2829323308270677,
|
|
"calib/final_conf_rate": 0.51953125,
|
|
"calib/format_rate": 0.5078125,
|
|
"calib/frac_conf_gt_0.9": 0.12781954887218044,
|
|
"calib/gap": 0.19675438596491232,
|
|
"calib/mean_conf": 0.3343609022556391,
|
|
"calib/mu_c": 0.4186842105263158,
|
|
"calib/mu_w": 0.2219298245614035,
|
|
"calib/nonempty_final_conf_rate": 0.51953125,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.02293233082706766,
|
|
"calib/std_conf": 0.28058036893869176,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.3819101123595505,
|
|
"calib/step_q_c_n": 89.0,
|
|
"calib/step_q_gap": 0.045900722688189066,
|
|
"calib/step_q_w": 0.33600938967136146,
|
|
"calib/step_q_w_n": 213.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.421875,
|
|
"completions/max_length": 2989.0,
|
|
"completions/max_terminated_length": 2989.0,
|
|
"completions/mean_length": 426.30078125,
|
|
"completions/mean_terminated_length": 737.3851318359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 114.0,
|
|
"epoch": 0.21333333333333335,
|
|
"grad_norm": 0.09993603080511093,
|
|
"learning_rate": 0.0,
|
|
"loss": -1.3081,
|
|
"mask/has_final_conf_rate": 0.51953125,
|
|
"mask/share_final_conf": 0.018429240211844444,
|
|
"mask/share_reasoning": 0.37678319215774536,
|
|
"mask/share_step_conf": 0.18291257321834564,
|
|
"num_tokens": 48442053.0,
|
|
"reward": 0.60547935962677,
|
|
"reward_std": 0.48481401801109314,
|
|
"rewards/accuracy_reward_step": 0.296875,
|
|
"rewards/final_brier_reward_step": 0.3608894348144531,
|
|
"rewards/format_reward_step": 0.5078125,
|
|
"rewards/step_l2_reward": 0.4594211280345917,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.21333333333333335,
|
|
"step": 200,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.2843201345717534,
|
|
"train_runtime": 30116.5704,
|
|
"train_samples_per_second": 1.7,
|
|
"train_steps_per_second": 0.007
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 48442053,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 25,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|