12243 lines
504 KiB
JSON
12243 lines
504 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.21333333333333335,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"adv/mean_abs_final_conf": 0.773959219455719,
|
|
"adv/mean_abs_reasoning": 0.47714588046073914,
|
|
"adv/mean_abs_step_conf": 0.7493494749069214,
|
|
"adv/ratio_final_to_reasoning": 1.622059942565935,
|
|
"adv/ratio_step_to_reasoning": 1.5704829604383013,
|
|
"adv/std_final_conf": 0.9294352531433105,
|
|
"adv/std_reasoning": 0.7393431663513184,
|
|
"adv/std_step_conf": 0.9337335228919983,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.38076182006817844,
|
|
"calib/avg_num_step_conf": 5.23046875,
|
|
"calib/ece": 0.2003187250996017,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.2948207171314741,
|
|
"calib/gap": -0.026059730250481805,
|
|
"calib/mean_conf": 0.8737051792828686,
|
|
"calib/mu_c": 0.865606936416185,
|
|
"calib/mu_w": 0.8916666666666668,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.19239043824701207,
|
|
"calib/std_conf": 0.09027744273295583,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.7959393232205367,
|
|
"calib/step_q_c_n": 857.0,
|
|
"calib/step_q_gap": -0.006446568895645877,
|
|
"calib/step_q_w": 0.8023858921161826,
|
|
"calib/step_q_w_n": 482.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2492.0,
|
|
"completions/max_terminated_length": 2492.0,
|
|
"completions/mean_length": 474.94921875,
|
|
"completions/mean_terminated_length": 478.68896484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 138.0,
|
|
"epoch": 0.0010666666666666667,
|
|
"grad_norm": 0.042860016226768494,
|
|
"kl": 0.000291675329208374,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": -0.011,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03466901555657387,
|
|
"mask/share_reasoning": 0.8340686559677124,
|
|
"mask/share_step_conf": 0.12344987690448761,
|
|
"num_tokens": 229171.0,
|
|
"reward": 0.8971271514892578,
|
|
"reward_std": 0.1976315677165985,
|
|
"rewards/accuracy_reward_step": 0.67578125,
|
|
"rewards/asymmetric_l2_reward": 0.749505341053009,
|
|
"rewards/final_brier_reward_step": 0.7142800688743591,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 1
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7672724723815918,
|
|
"adv/mean_abs_reasoning": 0.5104547739028931,
|
|
"adv/mean_abs_step_conf": 0.773115873336792,
|
|
"adv/ratio_final_to_reasoning": 1.503115479781084,
|
|
"adv/ratio_step_to_reasoning": 1.5145629208746838,
|
|
"adv/std_final_conf": 0.9330522418022156,
|
|
"adv/std_reasoning": 0.7575037479400635,
|
|
"adv/std_step_conf": 0.9337809085845947,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.44343065693430656,
|
|
"calib/avg_num_step_conf": 5.05859375,
|
|
"calib/ece": 0.3349411764705883,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.2823529411764706,
|
|
"calib/gap": 0.002352468143016151,
|
|
"calib/mean_conf": 0.8721960784313726,
|
|
"calib/mu_c": 0.8732846715328467,
|
|
"calib/mu_w": 0.8709322033898306,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3349411764705883,
|
|
"calib/std_conf": 0.07627016470309335,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.7954391371340525,
|
|
"calib/step_q_c_n": 649.0,
|
|
"calib/step_q_gap": 0.011011892552009073,
|
|
"calib/step_q_w": 0.7844272445820434,
|
|
"calib/step_q_w_n": 646.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1966.0,
|
|
"completions/max_terminated_length": 1966.0,
|
|
"completions/mean_length": 492.9765625,
|
|
"completions/mean_terminated_length": 494.9098205566406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 161.0,
|
|
"epoch": 0.0021333333333333334,
|
|
"grad_norm": 0.04081178456544876,
|
|
"kl": 0.00037539005279541016,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": -0.0106,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03364308178424835,
|
|
"mask/share_reasoning": 0.8523939251899719,
|
|
"mask/share_step_conf": 0.11005672812461853,
|
|
"num_tokens": 458661.0,
|
|
"reward": 0.8363707661628723,
|
|
"reward_std": 0.19354595243930817,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/asymmetric_l2_reward": 0.7344152927398682,
|
|
"rewards/final_brier_reward_step": 0.6320762038230896,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 2
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.753760814666748,
|
|
"adv/mean_abs_reasoning": 0.43374836444854736,
|
|
"adv/mean_abs_step_conf": 0.751661479473114,
|
|
"adv/ratio_final_to_reasoning": 1.737783647034735,
|
|
"adv/ratio_step_to_reasoning": 1.7329436629202057,
|
|
"adv/std_final_conf": 0.929868757724762,
|
|
"adv/std_reasoning": 0.7013001441955566,
|
|
"adv/std_step_conf": 0.9305136799812317,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5068113362541073,
|
|
"calib/avg_num_step_conf": 5.0078125,
|
|
"calib/ece": 0.22877952755905512,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.3425196850393701,
|
|
"calib/gap": 0.004660460021905566,
|
|
"calib/mean_conf": 0.8794094488188977,
|
|
"calib/mu_c": 0.881024096385542,
|
|
"calib/mu_w": 0.8763636363636365,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.22732283464566927,
|
|
"calib/std_conf": 0.05409278327150863,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.7904369538077403,
|
|
"calib/step_q_c_n": 801.0,
|
|
"calib/step_q_gap": 0.023389136759923157,
|
|
"calib/step_q_w": 0.7670478170478171,
|
|
"calib/step_q_w_n": 481.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2469.0,
|
|
"completions/max_terminated_length": 2469.0,
|
|
"completions/mean_length": 508.640625,
|
|
"completions/mean_terminated_length": 510.63531494140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 183.0,
|
|
"epoch": 0.0032,
|
|
"grad_norm": 0.04966992139816284,
|
|
"kl": 0.0011971145868301392,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.0326,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.032452650368213654,
|
|
"mask/share_reasoning": 0.8557740449905396,
|
|
"mask/share_step_conf": 0.10786702483892441,
|
|
"num_tokens": 694129.0,
|
|
"reward": 0.9003467559814453,
|
|
"reward_std": 0.16390666365623474,
|
|
"rewards/accuracy_reward_step": 0.6484375,
|
|
"rewards/asymmetric_l2_reward": 0.7596950531005859,
|
|
"rewards/final_brier_reward_step": 0.7144359350204468,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 3
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7722760438919067,
|
|
"adv/mean_abs_reasoning": 0.4103483259677887,
|
|
"adv/mean_abs_step_conf": 0.7630362510681152,
|
|
"adv/ratio_final_to_reasoning": 1.8820012048800911,
|
|
"adv/ratio_step_to_reasoning": 1.859484254671997,
|
|
"adv/std_final_conf": 0.9281506538391113,
|
|
"adv/std_reasoning": 0.6815478205680847,
|
|
"adv/std_step_conf": 0.9337376952171326,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4910992283741709,
|
|
"calib/avg_num_step_conf": 5.30859375,
|
|
"calib/ece": 0.22568627450980389,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.22745098039215686,
|
|
"calib/gap": 0.0012657371057265276,
|
|
"calib/mean_conf": 0.8766666666666667,
|
|
"calib/mu_c": 0.8771084337349399,
|
|
"calib/mu_w": 0.8758426966292133,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.22568627450980389,
|
|
"calib/std_conf": 0.04235548285764873,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.798277108433735,
|
|
"calib/step_q_c_n": 830.0,
|
|
"calib/step_q_gap": 0.006386749265493097,
|
|
"calib/step_q_w": 0.7918903591682419,
|
|
"calib/step_q_w_n": 529.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2305.0,
|
|
"completions/max_terminated_length": 2305.0,
|
|
"completions/mean_length": 510.30078125,
|
|
"completions/mean_terminated_length": 510.30078125,
|
|
"completions/min_length": 143.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.004266666666666667,
|
|
"grad_norm": 0.0446762815117836,
|
|
"kl": 0.0002792179584503174,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 0.046,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03316652029752731,
|
|
"mask/share_reasoning": 0.8474521040916443,
|
|
"mask/share_step_conf": 0.1193813905119896,
|
|
"num_tokens": 930934.0,
|
|
"reward": 0.8870111703872681,
|
|
"reward_std": 0.16794714331626892,
|
|
"rewards/accuracy_reward_step": 0.6484375,
|
|
"rewards/asymmetric_l2_reward": 0.736449658870697,
|
|
"rewards/final_brier_reward_step": 0.7102289199829102,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 4
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7767199873924255,
|
|
"adv/mean_abs_reasoning": 0.39009517431259155,
|
|
"adv/mean_abs_step_conf": 0.7709915637969971,
|
|
"adv/ratio_final_to_reasoning": 1.9911038088618427,
|
|
"adv/ratio_step_to_reasoning": 1.9764191268338662,
|
|
"adv/std_final_conf": 0.9301847219467163,
|
|
"adv/std_reasoning": 0.6612535119056702,
|
|
"adv/std_step_conf": 0.9337782263755798,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.4286752080306432,
|
|
"calib/avg_num_step_conf": 4.9609375,
|
|
"calib/ece": 0.33842105263157896,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.2874493927125506,
|
|
"calib/gap": -0.011496499801875615,
|
|
"calib/mean_conf": 0.880931174089069,
|
|
"calib/mu_c": 0.8756716417910448,
|
|
"calib/mu_w": 0.8871681415929205,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.97265625,
|
|
"calib/nonempty_step_conf_rate": 0.96875,
|
|
"calib/pce": 0.33842105263157896,
|
|
"calib/std_conf": 0.04485179508228513,
|
|
"calib/step_conf_rate": 0.96875,
|
|
"calib/step_q_c": 0.8001156069364163,
|
|
"calib/step_q_c_n": 692.0,
|
|
"calib/step_q_gap": 0.007139828389703395,
|
|
"calib/step_q_w": 0.7929757785467129,
|
|
"calib/step_q_w_n": 578.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2878.0,
|
|
"completions/max_terminated_length": 2878.0,
|
|
"completions/mean_length": 524.61328125,
|
|
"completions/mean_terminated_length": 526.6705932617188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.005333333333333333,
|
|
"grad_norm": 0.053633056581020355,
|
|
"kl": 0.000286102294921875,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": -0.0005,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.03404708951711655,
|
|
"mask/share_reasoning": 0.850751519203186,
|
|
"mask/share_step_conf": 0.111295185983181,
|
|
"num_tokens": 1171923.0,
|
|
"reward": 0.7870633602142334,
|
|
"reward_std": 0.16195642948150635,
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
|
"rewards/asymmetric_l2_reward": 0.6709086894989014,
|
|
"rewards/final_brier_reward_step": 0.6063430309295654,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 5
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7977774739265442,
|
|
"adv/mean_abs_reasoning": 0.42455434799194336,
|
|
"adv/mean_abs_step_conf": 0.7404891848564148,
|
|
"adv/ratio_final_to_reasoning": 1.879093872668767,
|
|
"adv/ratio_step_to_reasoning": 1.744156403906307,
|
|
"adv/std_final_conf": 0.9312154054641724,
|
|
"adv/std_reasoning": 0.6816370487213135,
|
|
"adv/std_step_conf": 0.9339516758918762,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.433967112024666,
|
|
"calib/avg_num_step_conf": 5.140625,
|
|
"calib/ece": 0.3313545816733068,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.33067729083665337,
|
|
"calib/gap": -0.009260662898253003,
|
|
"calib/mean_conf": 0.8851394422310758,
|
|
"calib/mu_c": 0.8810071942446042,
|
|
"calib/mu_w": 0.8902678571428572,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3313545816733068,
|
|
"calib/std_conf": 0.04471557388534855,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.7983999999999999,
|
|
"calib/step_q_c_n": 675.0,
|
|
"calib/step_q_gap": -0.011537597503900376,
|
|
"calib/step_q_w": 0.8099375975039003,
|
|
"calib/step_q_w_n": 641.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2532.0,
|
|
"completions/max_terminated_length": 2532.0,
|
|
"completions/mean_length": 464.72265625,
|
|
"completions/mean_terminated_length": 464.72265625,
|
|
"completions/min_length": 184.0,
|
|
"completions/min_terminated_length": 184.0,
|
|
"epoch": 0.0064,
|
|
"grad_norm": 0.051193155348300934,
|
|
"kl": 0.00040727853775024414,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": 0.0986,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.0360930897295475,
|
|
"mask/share_reasoning": 0.8376978635787964,
|
|
"mask/share_step_conf": 0.12620902061462402,
|
|
"num_tokens": 1396844.0,
|
|
"reward": 0.8131352663040161,
|
|
"reward_std": 0.18004879355430603,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/asymmetric_l2_reward": 0.6967132091522217,
|
|
"rewards/final_brier_reward_step": 0.6240886449813843,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 6
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7584425210952759,
|
|
"adv/mean_abs_reasoning": 0.47301214933395386,
|
|
"adv/mean_abs_step_conf": 0.7564212083816528,
|
|
"adv/ratio_final_to_reasoning": 1.60343137520513,
|
|
"adv/ratio_step_to_reasoning": 1.5991580965663692,
|
|
"adv/std_final_conf": 0.9305387735366821,
|
|
"adv/std_reasoning": 0.7393888831138611,
|
|
"adv/std_step_conf": 0.9338541030883789,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.47629160284083,
|
|
"calib/avg_num_step_conf": 5.03125,
|
|
"calib/ece": 0.22399209486166,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.32806324110671936,
|
|
"calib/gap": -0.002414705472775225,
|
|
"calib/mean_conf": 0.8837549407114624,
|
|
"calib/mu_c": 0.8829341317365269,
|
|
"calib/mu_w": 0.8853488372093021,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.22383399209486157,
|
|
"calib/std_conf": 0.04383461580512651,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.7935455635491607,
|
|
"calib/step_q_c_n": 834.0,
|
|
"calib/step_q_gap": 0.0046248587033457245,
|
|
"calib/step_q_w": 0.788920704845815,
|
|
"calib/step_q_w_n": 454.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2655.0,
|
|
"completions/max_terminated_length": 2655.0,
|
|
"completions/mean_length": 542.8203125,
|
|
"completions/mean_terminated_length": 544.9490356445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 178.0,
|
|
"epoch": 0.007466666666666667,
|
|
"grad_norm": 0.07613871246576309,
|
|
"kl": 0.00029200315475463867,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": 0.1122,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.030497439205646515,
|
|
"mask/share_reasoning": 0.8622984290122986,
|
|
"mask/share_step_conf": 0.1032978817820549,
|
|
"num_tokens": 1643230.0,
|
|
"reward": 0.8966531753540039,
|
|
"reward_std": 0.20346349477767944,
|
|
"rewards/accuracy_reward_step": 0.65234375,
|
|
"rewards/asymmetric_l2_reward": 0.757659375667572,
|
|
"rewards/final_brier_reward_step": 0.7090843915939331,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 7
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7677263617515564,
|
|
"adv/mean_abs_reasoning": 0.44562456011772156,
|
|
"adv/mean_abs_step_conf": 0.7679376602172852,
|
|
"adv/ratio_final_to_reasoning": 1.722809805520469,
|
|
"adv/ratio_step_to_reasoning": 1.7232839680434522,
|
|
"adv/std_final_conf": 0.929553747177124,
|
|
"adv/std_reasoning": 0.7014294862747192,
|
|
"adv/std_step_conf": 0.9342193603515625,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5305101373446698,
|
|
"calib/avg_num_step_conf": 4.59375,
|
|
"calib/ece": 0.32694779116465855,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.3092369477911647,
|
|
"calib/gap": 0.004426422498364779,
|
|
"calib/mean_conf": 0.8822891566265061,
|
|
"calib/mu_c": 0.8842446043165467,
|
|
"calib/mu_w": 0.8798181818181819,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.3255020080321285,
|
|
"calib/std_conf": 0.04544272094007264,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.7964705882352942,
|
|
"calib/step_q_c_n": 629.0,
|
|
"calib/step_q_gap": 0.03597698677277128,
|
|
"calib/step_q_w": 0.7604936014625229,
|
|
"calib/step_q_w_n": 547.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2316.0,
|
|
"completions/max_terminated_length": 2316.0,
|
|
"completions/mean_length": 533.7578125,
|
|
"completions/mean_terminated_length": 535.8510131835938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.008533333333333334,
|
|
"grad_norm": 0.04478863254189491,
|
|
"kl": 0.0004049241542816162,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": -0.0452,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.032160256057977676,
|
|
"mask/share_reasoning": 0.8618191480636597,
|
|
"mask/share_step_conf": 0.10211435705423355,
|
|
"num_tokens": 1886384.0,
|
|
"reward": 0.8338550925254822,
|
|
"reward_std": 0.18050694465637207,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.7384202480316162,
|
|
"rewards/final_brier_reward_step": 0.6269460916519165,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 8
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7801663875579834,
|
|
"adv/mean_abs_reasoning": 0.43747538328170776,
|
|
"adv/mean_abs_step_conf": 0.7640990614891052,
|
|
"adv/ratio_final_to_reasoning": 1.7833377999593711,
|
|
"adv/ratio_step_to_reasoning": 1.7466104166987415,
|
|
"adv/std_final_conf": 0.9302932620048523,
|
|
"adv/std_reasoning": 0.7014261484146118,
|
|
"adv/std_step_conf": 0.9335131645202637,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.42748740982714034,
|
|
"calib/avg_num_step_conf": 4.64453125,
|
|
"calib/ece": 0.2562151394422311,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.2908366533864542,
|
|
"calib/gap": -0.010103443582414662,
|
|
"calib/mean_conf": 0.880199203187251,
|
|
"calib/mu_c": 0.8764556962025316,
|
|
"calib/mu_w": 0.8865591397849463,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.2534661354581674,
|
|
"calib/std_conf": 0.044394556566673175,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.7760684931506849,
|
|
"calib/step_q_c_n": 730.0,
|
|
"calib/step_q_gap": 0.014957382039573863,
|
|
"calib/step_q_w": 0.7611111111111111,
|
|
"calib/step_q_w_n": 459.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2822.0,
|
|
"completions/max_terminated_length": 2822.0,
|
|
"completions/mean_length": 511.79296875,
|
|
"completions/mean_terminated_length": 513.800048828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.0,
|
|
"epoch": 0.0096,
|
|
"grad_norm": 0.042500849813222885,
|
|
"kl": 0.000448763370513916,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": 0.0164,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03407716751098633,
|
|
"mask/share_reasoning": 0.8594886660575867,
|
|
"mask/share_step_conf": 0.10252793878316879,
|
|
"num_tokens": 2124939.0,
|
|
"reward": 0.8477847576141357,
|
|
"reward_std": 0.20172211527824402,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.7034733295440674,
|
|
"rewards/final_brier_reward_step": 0.6749086380004883,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 9
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7686961889266968,
|
|
"adv/mean_abs_reasoning": 0.46870675683021545,
|
|
"adv/mean_abs_step_conf": 0.7512601017951965,
|
|
"adv/ratio_final_to_reasoning": 1.6400364998474934,
|
|
"adv/ratio_step_to_reasoning": 1.6028360821504721,
|
|
"adv/std_final_conf": 0.9307101368904114,
|
|
"adv/std_reasoning": 0.72056645154953,
|
|
"adv/std_step_conf": 0.9345166087150574,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4519811320754717,
|
|
"calib/avg_num_step_conf": 5.2890625,
|
|
"calib/ece": 0.30464843750000004,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.375,
|
|
"calib/gap": -0.0036691823899371867,
|
|
"calib/mean_conf": 0.8905859375,
|
|
"calib/mu_c": 0.8890666666666666,
|
|
"calib/mu_w": 0.8927358490566037,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.30464843750000004,
|
|
"calib/std_conf": 0.04735224178691115,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.7855445544554456,
|
|
"calib/step_q_c_n": 707.0,
|
|
"calib/step_q_gap": -0.0018897577547554167,
|
|
"calib/step_q_w": 0.787434312210201,
|
|
"calib/step_q_w_n": 647.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1378.0,
|
|
"completions/max_terminated_length": 1378.0,
|
|
"completions/mean_length": 516.8359375,
|
|
"completions/mean_terminated_length": 518.86279296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 187.0,
|
|
"epoch": 0.010666666666666666,
|
|
"grad_norm": 0.03771434351801872,
|
|
"kl": 0.0006309151649475098,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.075,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03189965710043907,
|
|
"mask/share_reasoning": 0.8523790836334229,
|
|
"mask/share_step_conf": 0.11181497573852539,
|
|
"num_tokens": 2364049.0,
|
|
"reward": 0.8517932891845703,
|
|
"reward_std": 0.19309264421463013,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.7258471250534058,
|
|
"rewards/final_brier_reward_step": 0.660551905632019,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 10
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7453495264053345,
|
|
"adv/mean_abs_reasoning": 0.3924379348754883,
|
|
"adv/mean_abs_step_conf": 0.7527601718902588,
|
|
"adv/ratio_final_to_reasoning": 1.8992800139003307,
|
|
"adv/ratio_step_to_reasoning": 1.9181636253617853,
|
|
"adv/std_final_conf": 0.9260469079017639,
|
|
"adv/std_reasoning": 0.681506335735321,
|
|
"adv/std_step_conf": 0.9340210556983948,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4787878787878788,
|
|
"calib/avg_num_step_conf": 5.40625,
|
|
"calib/ece": 0.30251968503937005,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.4566929133858268,
|
|
"calib/gap": -0.019564679048550082,
|
|
"calib/mean_conf": 0.8861417322834646,
|
|
"calib/mu_c": 0.878516129032258,
|
|
"calib/mu_w": 0.898080808080808,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2892125984251968,
|
|
"calib/std_conf": 0.10039329936618835,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.7779198966408268,
|
|
"calib/step_q_c_n": 774.0,
|
|
"calib/step_q_gap": 0.008329732706400583,
|
|
"calib/step_q_w": 0.7695901639344263,
|
|
"calib/step_q_w_n": 610.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2714.0,
|
|
"completions/max_terminated_length": 2714.0,
|
|
"completions/mean_length": 531.82421875,
|
|
"completions/mean_terminated_length": 531.82421875,
|
|
"completions/min_length": 172.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.011733333333333333,
|
|
"grad_norm": 0.038181111216545105,
|
|
"kl": 0.001034379005432129,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": 0.103,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.031716808676719666,
|
|
"mask/share_reasoning": 0.8542863130569458,
|
|
"mask/share_step_conf": 0.11399686336517334,
|
|
"num_tokens": 2604676.0,
|
|
"reward": 0.8596004247665405,
|
|
"reward_std": 0.16910496354103088,
|
|
"rewards/accuracy_reward_step": 0.60546875,
|
|
"rewards/asymmetric_l2_reward": 0.7382351160049438,
|
|
"rewards/final_brier_reward_step": 0.6614344120025635,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 11
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7681692242622375,
|
|
"adv/mean_abs_reasoning": 0.4303590655326843,
|
|
"adv/mean_abs_step_conf": 0.7345424294471741,
|
|
"adv/ratio_final_to_reasoning": 1.7849495590651097,
|
|
"adv/ratio_step_to_reasoning": 1.7068129575427475,
|
|
"adv/std_final_conf": 0.9277141094207764,
|
|
"adv/std_reasoning": 0.7013878226280212,
|
|
"adv/std_step_conf": 0.9326591491699219,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.4637932279257816,
|
|
"calib/avg_num_step_conf": 5.38671875,
|
|
"calib/ece": 0.2204761904761905,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.5277777777777778,
|
|
"calib/gap": -0.002456140350877156,
|
|
"calib/mean_conf": 0.8983333333333333,
|
|
"calib/mu_c": 0.8975438596491229,
|
|
"calib/mu_w": 0.9,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.2201190476190476,
|
|
"calib/std_conf": 0.05905673892705567,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.7772737819025521,
|
|
"calib/step_q_c_n": 862.0,
|
|
"calib/step_q_gap": 0.025649023682049243,
|
|
"calib/step_q_w": 0.7516247582205029,
|
|
"calib/step_q_w_n": 517.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2306.0,
|
|
"completions/max_terminated_length": 2306.0,
|
|
"completions/mean_length": 467.9765625,
|
|
"completions/mean_terminated_length": 471.6614074707031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.0128,
|
|
"grad_norm": 0.06263236701488495,
|
|
"kl": 0.003935456275939941,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.0278,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03686348348855972,
|
|
"mask/share_reasoning": 0.8269122838973999,
|
|
"mask/share_step_conf": 0.12841171026229858,
|
|
"num_tokens": 2828654.0,
|
|
"reward": 0.9123976230621338,
|
|
"reward_std": 0.17215202748775482,
|
|
"rewards/accuracy_reward_step": 0.66796875,
|
|
"rewards/asymmetric_l2_reward": 0.7845866680145264,
|
|
"rewards/final_brier_reward_step": 0.7113023400306702,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 12
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7388582229614258,
|
|
"adv/mean_abs_reasoning": 0.41692185401916504,
|
|
"adv/mean_abs_step_conf": 0.7494971752166748,
|
|
"adv/ratio_final_to_reasoning": 1.7721743675433765,
|
|
"adv/ratio_step_to_reasoning": 1.7976922245534819,
|
|
"adv/std_final_conf": 0.9258735775947571,
|
|
"adv/std_reasoning": 0.7204419374465942,
|
|
"adv/std_step_conf": 0.9338845610618591,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.510838445807771,
|
|
"calib/avg_num_step_conf": 4.87109375,
|
|
"calib/ece": 0.26177865612648216,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5296442687747036,
|
|
"calib/gap": 0.004040218132242646,
|
|
"calib/mean_conf": 0.9060474308300396,
|
|
"calib/mu_c": 0.9074846625766873,
|
|
"calib/mu_w": 0.9034444444444446,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.26177865612648216,
|
|
"calib/std_conf": 0.04516318373782039,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.766610824742268,
|
|
"calib/step_q_c_n": 776.0,
|
|
"calib/step_q_gap": 0.026143733447151263,
|
|
"calib/step_q_w": 0.7404670912951168,
|
|
"calib/step_q_w_n": 471.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2335.0,
|
|
"completions/max_terminated_length": 2335.0,
|
|
"completions/mean_length": 490.625,
|
|
"completions/mean_terminated_length": 490.625,
|
|
"completions/min_length": 118.0,
|
|
"completions/min_terminated_length": 118.0,
|
|
"epoch": 0.013866666666666666,
|
|
"grad_norm": 0.04346369951963425,
|
|
"kl": 0.002653837203979492,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": 0.0747,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.0344499908387661,
|
|
"mask/share_reasoning": 0.8513206243515015,
|
|
"mask/share_step_conf": 0.11422930657863617,
|
|
"num_tokens": 3058846.0,
|
|
"reward": 0.9044943451881409,
|
|
"reward_std": 0.173051655292511,
|
|
"rewards/accuracy_reward_step": 0.63671875,
|
|
"rewards/asymmetric_l2_reward": 0.7901186943054199,
|
|
"rewards/final_brier_reward_step": 0.693869948387146,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 13
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7576812505722046,
|
|
"adv/mean_abs_reasoning": 0.5075523853302002,
|
|
"adv/mean_abs_step_conf": 0.7558131217956543,
|
|
"adv/ratio_final_to_reasoning": 1.4928138897018033,
|
|
"adv/ratio_step_to_reasoning": 1.489133227704057,
|
|
"adv/std_final_conf": 0.9261738061904907,
|
|
"adv/std_reasoning": 0.7575864791870117,
|
|
"adv/std_step_conf": 0.9350097179412842,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.5146290491118077,
|
|
"calib/avg_num_step_conf": 5.40234375,
|
|
"calib/ece": 0.39185483870967736,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.7419354838709677,
|
|
"calib/gap": 0.0006008359456635137,
|
|
"calib/mean_conf": 0.9241129032258065,
|
|
"calib/mu_c": 0.9243939393939393,
|
|
"calib/mu_w": 0.9237931034482758,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.39185483870967736,
|
|
"calib/std_conf": 0.03558298108905692,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.7218681318681318,
|
|
"calib/step_q_c_n": 728.0,
|
|
"calib/step_q_gap": 0.01843301736431502,
|
|
"calib/step_q_w": 0.7034351145038168,
|
|
"calib/step_q_w_n": 655.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2932.0,
|
|
"completions/max_terminated_length": 2932.0,
|
|
"completions/mean_length": 563.546875,
|
|
"completions/mean_terminated_length": 565.7568969726562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.014933333333333333,
|
|
"grad_norm": 0.04784300550818443,
|
|
"kl": 0.007000446319580078,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.0317,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03211609274148941,
|
|
"mask/share_reasoning": 0.8483536243438721,
|
|
"mask/share_step_conf": 0.11562406271696091,
|
|
"num_tokens": 3308514.0,
|
|
"reward": 0.8109242916107178,
|
|
"reward_std": 0.2076244205236435,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/asymmetric_l2_reward": 0.7517649531364441,
|
|
"rewards/final_brier_reward_step": 0.5739898681640625,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 14
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.735247015953064,
|
|
"adv/mean_abs_reasoning": 0.41596412658691406,
|
|
"adv/mean_abs_step_conf": 0.7716025710105896,
|
|
"adv/ratio_final_to_reasoning": 1.7675731366210423,
|
|
"adv/ratio_step_to_reasoning": 1.854973834743334,
|
|
"adv/std_final_conf": 0.9187384843826294,
|
|
"adv/std_reasoning": 0.7012813687324524,
|
|
"adv/std_step_conf": 0.9337195158004761,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.49031918842014094,
|
|
"calib/avg_num_step_conf": 5.23046875,
|
|
"calib/ece": 0.3992156862745099,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.8901960784313725,
|
|
"calib/gap": -0.0005752814549054852,
|
|
"calib/mean_conf": 0.9364705882352942,
|
|
"calib/mu_c": 0.9362043795620437,
|
|
"calib/mu_w": 0.9367796610169492,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3992156862745099,
|
|
"calib/std_conf": 0.030718425805495036,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.6808519553072626,
|
|
"calib/step_q_c_n": 716.0,
|
|
"calib/step_q_gap": 0.008267685644341216,
|
|
"calib/step_q_w": 0.6725842696629214,
|
|
"calib/step_q_w_n": 623.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1352.0,
|
|
"completions/max_terminated_length": 1352.0,
|
|
"completions/mean_length": 473.62890625,
|
|
"completions/mean_terminated_length": 475.4862976074219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.03617151826620102,
|
|
"kl": 0.010650634765625,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": -0.0205,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.034178197383880615,
|
|
"mask/share_reasoning": 0.8413941264152527,
|
|
"mask/share_step_conf": 0.1205214262008667,
|
|
"num_tokens": 3537643.0,
|
|
"reward": 0.840351939201355,
|
|
"reward_std": 0.1773625612258911,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/asymmetric_l2_reward": 0.7859764099121094,
|
|
"rewards/final_brier_reward_step": 0.588477373123169,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 15
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7502319812774658,
|
|
"adv/mean_abs_reasoning": 0.40567100048065186,
|
|
"adv/mean_abs_step_conf": 0.7679750919342041,
|
|
"adv/ratio_final_to_reasoning": 1.849360640490859,
|
|
"adv/ratio_step_to_reasoning": 1.8930983260432293,
|
|
"adv/std_final_conf": 0.9114437103271484,
|
|
"adv/std_reasoning": 0.6816147565841675,
|
|
"adv/std_step_conf": 0.9336596727371216,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5097402597402597,
|
|
"calib/avg_num_step_conf": 6.546875,
|
|
"calib/ece": 0.3347200000000001,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.932,
|
|
"calib/gap": 0.0009997294372294796,
|
|
"calib/mean_conf": 0.95072,
|
|
"calib/mu_c": 0.9511038961038961,
|
|
"calib/mu_w": 0.9501041666666666,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3347200000000001,
|
|
"calib/std_conf": 0.027761152713819345,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.6192934249263984,
|
|
"calib/step_q_c_n": 1019.0,
|
|
"calib/step_q_gap": 0.029019452323658768,
|
|
"calib/step_q_w": 0.5902739726027396,
|
|
"calib/step_q_w_n": 657.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2684.0,
|
|
"completions/max_terminated_length": 2684.0,
|
|
"completions/mean_length": 663.50390625,
|
|
"completions/mean_terminated_length": 668.7283325195312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 183.0,
|
|
"epoch": 0.017066666666666667,
|
|
"grad_norm": 0.03823031485080719,
|
|
"kl": 0.010951042175292969,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.0136,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.02500973641872406,
|
|
"mask/share_reasoning": 0.857978880405426,
|
|
"mask/share_step_conf": 0.10919886827468872,
|
|
"num_tokens": 3816348.0,
|
|
"reward": 0.879867672920227,
|
|
"reward_std": 0.17731714248657227,
|
|
"rewards/accuracy_reward_step": 0.6015625,
|
|
"rewards/asymmetric_l2_reward": 0.8082501888275146,
|
|
"rewards/final_brier_reward_step": 0.6358601450920105,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 16
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.749430775642395,
|
|
"adv/mean_abs_reasoning": 0.4871532618999481,
|
|
"adv/mean_abs_step_conf": 0.7657231688499451,
|
|
"adv/ratio_final_to_reasoning": 1.538388089036985,
|
|
"adv/ratio_step_to_reasoning": 1.5718321701543072,
|
|
"adv/std_final_conf": 0.9169648289680481,
|
|
"adv/std_reasoning": 0.7576295733451843,
|
|
"adv/std_step_conf": 0.9339720606803894,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5530997098793708,
|
|
"calib/avg_num_step_conf": 5.8125,
|
|
"calib/ece": 0.2446215139442231,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.9243027888446215,
|
|
"calib/gap": 0.0062330126736906966,
|
|
"calib/mean_conf": 0.9498007968127489,
|
|
"calib/mu_c": 0.951638418079096,
|
|
"calib/mu_w": 0.9454054054054053,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.2446215139442231,
|
|
"calib/std_conf": 0.025459133712774463,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.6141885325558795,
|
|
"calib/step_q_c_n": 1029.0,
|
|
"calib/step_q_gap": 0.04153058048616265,
|
|
"calib/step_q_w": 0.5726579520697168,
|
|
"calib/step_q_w_n": 459.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2605.0,
|
|
"completions/max_terminated_length": 2605.0,
|
|
"completions/mean_length": 507.30078125,
|
|
"completions/mean_terminated_length": 513.3162231445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 107.0,
|
|
"epoch": 0.018133333333333335,
|
|
"grad_norm": 0.03812658414244652,
|
|
"kl": 0.017798423767089844,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": -0.0293,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.032309580594301224,
|
|
"mask/share_reasoning": 0.8335152864456177,
|
|
"mask/share_step_conf": 0.12245635688304901,
|
|
"num_tokens": 4049745.0,
|
|
"reward": 0.9368460774421692,
|
|
"reward_std": 0.2139730602502823,
|
|
"rewards/accuracy_reward_step": 0.69140625,
|
|
"rewards/asymmetric_l2_reward": 0.8321975469589233,
|
|
"rewards/final_brier_reward_step": 0.7102445363998413,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 17
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7235511541366577,
|
|
"adv/mean_abs_reasoning": 0.4305734634399414,
|
|
"adv/mean_abs_step_conf": 0.7692475318908691,
|
|
"adv/ratio_final_to_reasoning": 1.680436012837522,
|
|
"adv/ratio_step_to_reasoning": 1.7865651211878917,
|
|
"adv/std_final_conf": 0.9193499088287354,
|
|
"adv/std_reasoning": 0.720542848110199,
|
|
"adv/std_step_conf": 0.9344537258148193,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.46025487350955774,
|
|
"calib/avg_num_step_conf": 5.11328125,
|
|
"calib/ece": 0.4365476190476191,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.9563492063492064,
|
|
"calib/gap": -0.00011040312914001316,
|
|
"calib/mean_conf": 0.9563888888888888,
|
|
"calib/mu_c": 0.9563358778625954,
|
|
"calib/mu_w": 0.9564462809917355,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.4365476190476191,
|
|
"calib/std_conf": 0.029413168474644355,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.5846951219512195,
|
|
"calib/step_q_c_n": 656.0,
|
|
"calib/step_q_gap": 0.005368935121204288,
|
|
"calib/step_q_w": 0.5793261868300152,
|
|
"calib/step_q_w_n": 653.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2904.0,
|
|
"completions/max_terminated_length": 2904.0,
|
|
"completions/mean_length": 508.76953125,
|
|
"completions/mean_terminated_length": 512.7755737304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 199.0,
|
|
"epoch": 0.0192,
|
|
"grad_norm": 0.04069630801677704,
|
|
"kl": 0.019733428955078125,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": -0.0352,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03226814791560173,
|
|
"mask/share_reasoning": 0.8506003618240356,
|
|
"mask/share_step_conf": 0.10931900143623352,
|
|
"num_tokens": 4290710.0,
|
|
"reward": 0.8182658553123474,
|
|
"reward_std": 0.1954856812953949,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/asymmetric_l2_reward": 0.7965109348297119,
|
|
"rewards/final_brier_reward_step": 0.5423644781112671,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 18
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7590962648391724,
|
|
"adv/mean_abs_reasoning": 0.3977474570274353,
|
|
"adv/mean_abs_step_conf": 0.7660222053527832,
|
|
"adv/ratio_final_to_reasoning": 1.9084880404070375,
|
|
"adv/ratio_step_to_reasoning": 1.925900950008954,
|
|
"adv/std_final_conf": 0.9002686142921448,
|
|
"adv/std_reasoning": 0.66129070520401,
|
|
"adv/std_step_conf": 0.9337433576583862,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5557777777777777,
|
|
"calib/avg_num_step_conf": 4.80078125,
|
|
"calib/ece": 0.3707843137254901,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.9725490196078431,
|
|
"calib/gap": 0.007238095238095044,
|
|
"calib/mean_conf": 0.9590196078431373,
|
|
"calib/mu_c": 0.9619999999999999,
|
|
"calib/mu_w": 0.9547619047619048,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.3707843137254901,
|
|
"calib/std_conf": 0.027911267101634094,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5466435506241332,
|
|
"calib/step_q_c_n": 721.0,
|
|
"calib/step_q_gap": 0.025147487632007248,
|
|
"calib/step_q_w": 0.5214960629921259,
|
|
"calib/step_q_w_n": 508.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1758.0,
|
|
"completions/max_terminated_length": 1758.0,
|
|
"completions/mean_length": 484.01953125,
|
|
"completions/mean_terminated_length": 485.91766357421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.0,
|
|
"epoch": 0.020266666666666665,
|
|
"grad_norm": 0.024780066683888435,
|
|
"kl": 0.02925872802734375,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": -0.0072,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03218923509120941,
|
|
"mask/share_reasoning": 0.8538081645965576,
|
|
"mask/share_step_conf": 0.11009633541107178,
|
|
"num_tokens": 4519379.0,
|
|
"reward": 0.8946607112884521,
|
|
"reward_std": 0.16607698798179626,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.8523170948028564,
|
|
"rewards/final_brier_reward_step": 0.6205980777740479,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 19
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7314550876617432,
|
|
"adv/mean_abs_reasoning": 0.5331639051437378,
|
|
"adv/mean_abs_step_conf": 0.7298775911331177,
|
|
"adv/ratio_final_to_reasoning": 1.3719141161000898,
|
|
"adv/ratio_step_to_reasoning": 1.3689553701808586,
|
|
"adv/std_final_conf": 0.9152284860610962,
|
|
"adv/std_reasoning": 0.7927942276000977,
|
|
"adv/std_step_conf": 0.9345114827156067,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.4764349489795918,
|
|
"calib/avg_num_step_conf": 5.6015625,
|
|
"calib/ece": 0.41123015873015883,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.9761904761904762,
|
|
"calib/gap": -0.0030535714285715443,
|
|
"calib/mean_conf": 0.964642857142857,
|
|
"calib/mu_c": 0.9632857142857142,
|
|
"calib/mu_w": 0.9663392857142857,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.4101587301587303,
|
|
"calib/std_conf": 0.02658229637795846,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.5052864583333333,
|
|
"calib/step_q_c_n": 768.0,
|
|
"calib/step_q_gap": 0.01378495683183173,
|
|
"calib/step_q_w": 0.49150150150150157,
|
|
"calib/step_q_w_n": 666.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2446.0,
|
|
"completions/max_terminated_length": 2446.0,
|
|
"completions/mean_length": 500.95703125,
|
|
"completions/mean_terminated_length": 504.9015808105469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 183.0,
|
|
"epoch": 0.021333333333333333,
|
|
"grad_norm": 0.025546662509441376,
|
|
"kl": 0.041919708251953125,
|
|
"learning_rate": 5e-06,
|
|
"loss": -0.0376,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.0341811366379261,
|
|
"mask/share_reasoning": 0.8281325697898865,
|
|
"mask/share_step_conf": 0.12987381219863892,
|
|
"num_tokens": 4752496.0,
|
|
"reward": 0.8547559976577759,
|
|
"reward_std": 0.21240723133087158,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/asymmetric_l2_reward": 0.8308833837509155,
|
|
"rewards/final_brier_reward_step": 0.5739409923553467,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 20
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7029703855514526,
|
|
"adv/mean_abs_reasoning": 0.4354853928089142,
|
|
"adv/mean_abs_step_conf": 0.7637710571289062,
|
|
"adv/ratio_final_to_reasoning": 1.6142226516881306,
|
|
"adv/ratio_step_to_reasoning": 1.7538385207423937,
|
|
"adv/std_final_conf": 0.8815454840660095,
|
|
"adv/std_reasoning": 0.7205679416656494,
|
|
"adv/std_step_conf": 0.9338224530220032,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.47911710282844305,
|
|
"calib/avg_num_step_conf": 6.03125,
|
|
"calib/ece": 0.3549011857707511,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.9802371541501976,
|
|
"calib/gap": -0.002429949775310858,
|
|
"calib/mean_conf": 0.9698418972332016,
|
|
"calib/mu_c": 0.9689102564102563,
|
|
"calib/mu_w": 0.9713402061855672,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.35407114624505936,
|
|
"calib/std_conf": 0.022649911297309762,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4717988826815642,
|
|
"calib/step_q_c_n": 895.0,
|
|
"calib/step_q_gap": 0.04224572397586318,
|
|
"calib/step_q_w": 0.42955315870570104,
|
|
"calib/step_q_w_n": 649.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2070.0,
|
|
"completions/max_terminated_length": 2070.0,
|
|
"completions/mean_length": 512.80078125,
|
|
"completions/mean_terminated_length": 514.811767578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.0224,
|
|
"grad_norm": 0.022558465600013733,
|
|
"kl": 0.045940399169921875,
|
|
"learning_rate": 4.9722222222222224e-06,
|
|
"loss": 0.0118,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.0322754830121994,
|
|
"mask/share_reasoning": 0.8363606333732605,
|
|
"mask/share_step_conf": 0.1274576187133789,
|
|
"num_tokens": 4986733.0,
|
|
"reward": 0.9055733680725098,
|
|
"reward_std": 0.18177592754364014,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/asymmetric_l2_reward": 0.8619275093078613,
|
|
"rewards/final_brier_reward_step": 0.6296879053115845,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 21
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.687179446220398,
|
|
"adv/mean_abs_reasoning": 0.3393262028694153,
|
|
"adv/mean_abs_step_conf": 0.7603123188018799,
|
|
"adv/ratio_final_to_reasoning": 2.0251293310373937,
|
|
"adv/ratio_step_to_reasoning": 2.2406531307412028,
|
|
"adv/std_final_conf": 0.8886227607727051,
|
|
"adv/std_reasoning": 0.6402140855789185,
|
|
"adv/std_step_conf": 0.934212863445282,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.497718760640109,
|
|
"calib/avg_num_step_conf": 6.17578125,
|
|
"calib/ece": 0.3266929133858268,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.984251968503937,
|
|
"calib/gap": -0.0016397684712292637,
|
|
"calib/mean_conf": 0.9719685039370078,
|
|
"calib/mu_c": 0.9713939393939393,
|
|
"calib/mu_w": 0.9730337078651685,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.32452755905511815,
|
|
"calib/std_conf": 0.024172587131458173,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4599161616161616,
|
|
"calib/step_q_c_n": 990.0,
|
|
"calib/step_q_gap": -0.014872332461672633,
|
|
"calib/step_q_w": 0.47478849407783424,
|
|
"calib/step_q_w_n": 591.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2402.0,
|
|
"completions/max_terminated_length": 2402.0,
|
|
"completions/mean_length": 505.25,
|
|
"completions/mean_terminated_length": 507.2314147949219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 195.0,
|
|
"epoch": 0.023466666666666667,
|
|
"grad_norm": 0.024227218702435493,
|
|
"kl": 0.056705474853515625,
|
|
"learning_rate": 4.944444444444445e-06,
|
|
"loss": 0.0027,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.031584907323122025,
|
|
"mask/share_reasoning": 0.8346095681190491,
|
|
"mask/share_step_conf": 0.1298992782831192,
|
|
"num_tokens": 5217893.0,
|
|
"reward": 0.9237887859344482,
|
|
"reward_std": 0.15195703506469727,
|
|
"rewards/accuracy_reward_step": 0.64453125,
|
|
"rewards/asymmetric_l2_reward": 0.8583118915557861,
|
|
"rewards/final_brier_reward_step": 0.6619218587875366,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 22
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7530167102813721,
|
|
"adv/mean_abs_reasoning": 0.4509432315826416,
|
|
"adv/mean_abs_step_conf": 0.74357008934021,
|
|
"adv/ratio_final_to_reasoning": 1.6698703019414791,
|
|
"adv/ratio_step_to_reasoning": 1.6489217206577373,
|
|
"adv/std_final_conf": 0.897098183631897,
|
|
"adv/std_reasoning": 0.7014564871788025,
|
|
"adv/std_step_conf": 0.9347206950187683,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.4533584431889517,
|
|
"calib/avg_num_step_conf": 5.5859375,
|
|
"calib/ece": 0.4400395256916997,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.9920948616600791,
|
|
"calib/gap": -0.0006911487758944901,
|
|
"calib/mean_conf": 0.9727667984189724,
|
|
"calib/mu_c": 0.9724444444444442,
|
|
"calib/mu_w": 0.9731355932203387,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.439604743083004,
|
|
"calib/std_conf": 0.02336031913958452,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.47111850865512644,
|
|
"calib/step_q_c_n": 751.0,
|
|
"calib/step_q_gap": 0.015625136048351806,
|
|
"calib/step_q_w": 0.45549337260677464,
|
|
"calib/step_q_w_n": 679.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2113.0,
|
|
"completions/max_terminated_length": 2113.0,
|
|
"completions/mean_length": 516.28125,
|
|
"completions/mean_terminated_length": 520.346435546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.024533333333333334,
|
|
"grad_norm": 0.030233683064579964,
|
|
"kl": 0.048114776611328125,
|
|
"learning_rate": 4.9166666666666665e-06,
|
|
"loss": -0.0494,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03315791115164757,
|
|
"mask/share_reasoning": 0.8343464136123657,
|
|
"mask/share_step_conf": 0.1246831864118576,
|
|
"num_tokens": 5453997.0,
|
|
"reward": 0.8372361660003662,
|
|
"reward_std": 0.20087505877017975,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/asymmetric_l2_reward": 0.8298656940460205,
|
|
"rewards/final_brier_reward_step": 0.5430440902709961,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 23
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7376492023468018,
|
|
"adv/mean_abs_reasoning": 0.5325069427490234,
|
|
"adv/mean_abs_step_conf": 0.7742867469787598,
|
|
"adv/ratio_final_to_reasoning": 1.3852386572440694,
|
|
"adv/ratio_step_to_reasoning": 1.4540406609190255,
|
|
"adv/std_final_conf": 0.9101276993751526,
|
|
"adv/std_reasoning": 0.7754067778587341,
|
|
"adv/std_step_conf": 0.9344092011451721,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.6002886002886003,
|
|
"calib/avg_num_step_conf": 6.46484375,
|
|
"calib/ece": 0.46437246963562745,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.9919028340080972,
|
|
"calib/gap": 0.006217368490095798,
|
|
"calib/mean_conf": 0.974493927125506,
|
|
"calib/mu_c": 0.9775396825396826,
|
|
"calib/mu_w": 0.9713223140495868,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.46437246963562745,
|
|
"calib/std_conf": 0.018606487581600245,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.43772254335260113,
|
|
"calib/step_q_c_n": 865.0,
|
|
"calib/step_q_gap": -0.00044201360942414114,
|
|
"calib/step_q_w": 0.4381645569620253,
|
|
"calib/step_q_w_n": 790.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3053.0,
|
|
"completions/max_terminated_length": 3053.0,
|
|
"completions/mean_length": 605.53125,
|
|
"completions/mean_terminated_length": 605.53125,
|
|
"completions/min_length": 158.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.0256,
|
|
"grad_norm": 0.7543313503265381,
|
|
"kl": 1.2979049682617188,
|
|
"learning_rate": 4.888888888888889e-06,
|
|
"loss": 0.1109,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.0299096517264843,
|
|
"mask/share_reasoning": 0.8437927961349487,
|
|
"mask/share_step_conf": 0.12629754841327667,
|
|
"num_tokens": 5713525.0,
|
|
"reward": 0.8208262324333191,
|
|
"reward_std": 0.2088262289762497,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/asymmetric_l2_reward": 0.831911027431488,
|
|
"rewards/final_brier_reward_step": 0.5183351635932922,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 24
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7134343385696411,
|
|
"adv/mean_abs_reasoning": 0.4166892170906067,
|
|
"adv/mean_abs_step_conf": 0.7519113421440125,
|
|
"adv/ratio_final_to_reasoning": 1.7121497492806705,
|
|
"adv/ratio_step_to_reasoning": 1.8044895603346358,
|
|
"adv/std_final_conf": 0.8936386108398438,
|
|
"adv/std_reasoning": 0.7013903260231018,
|
|
"adv/std_step_conf": 0.934689462184906,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5044484580908423,
|
|
"calib/avg_num_step_conf": 6.16796875,
|
|
"calib/ece": 0.36948000000000003,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.988,
|
|
"calib/gap": 0.0007445314067830999,
|
|
"calib/mean_conf": 0.97348,
|
|
"calib/mu_c": 0.973774834437086,
|
|
"calib/mu_w": 0.9730303030303029,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.36948000000000003,
|
|
"calib/std_conf": 0.022169564722835676,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4669714285714286,
|
|
"calib/step_q_c_n": 875.0,
|
|
"calib/step_q_gap": 0.04030949675324674,
|
|
"calib/step_q_w": 0.42666193181818185,
|
|
"calib/step_q_w_n": 704.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2316.0,
|
|
"completions/max_terminated_length": 2316.0,
|
|
"completions/mean_length": 517.859375,
|
|
"completions/mean_terminated_length": 517.859375,
|
|
"completions/min_length": 189.0,
|
|
"completions/min_terminated_length": 189.0,
|
|
"epoch": 0.02666666666666667,
|
|
"grad_norm": 0.02401627041399479,
|
|
"kl": 0.04943084716796875,
|
|
"learning_rate": 4.861111111111111e-06,
|
|
"loss": 0.0918,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03197113424539566,
|
|
"mask/share_reasoning": 0.8365023136138916,
|
|
"mask/share_step_conf": 0.13152649998664856,
|
|
"num_tokens": 5949321.0,
|
|
"reward": 0.8754016160964966,
|
|
"reward_std": 0.18805429339408875,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/asymmetric_l2_reward": 0.8299168944358826,
|
|
"rewards/final_brier_reward_step": 0.6083863377571106,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 25
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7160738706588745,
|
|
"adv/mean_abs_reasoning": 0.406027227640152,
|
|
"adv/mean_abs_step_conf": 0.7333802580833435,
|
|
"adv/ratio_final_to_reasoning": 1.7636104721861319,
|
|
"adv/ratio_step_to_reasoning": 1.806234183716648,
|
|
"adv/std_final_conf": 0.8886821269989014,
|
|
"adv/std_reasoning": 0.7012869715690613,
|
|
"adv/std_step_conf": 0.9339894652366638,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.487475257661593,
|
|
"calib/avg_num_step_conf": 5.6484375,
|
|
"calib/ece": 0.33404761904761915,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/gap": -0.0007357859531772482,
|
|
"calib/mean_conf": 0.972936507936508,
|
|
"calib/mu_c": 0.9726708074534163,
|
|
"calib/mu_w": 0.9734065934065935,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.33404761904761915,
|
|
"calib/std_conf": 0.01628454620025839,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.47772833723653396,
|
|
"calib/step_q_c_n": 854.0,
|
|
"calib/step_q_gap": 0.0363432021013988,
|
|
"calib/step_q_w": 0.44138513513513516,
|
|
"calib/step_q_w_n": 592.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2908.0,
|
|
"completions/max_terminated_length": 2908.0,
|
|
"completions/mean_length": 528.06640625,
|
|
"completions/mean_terminated_length": 528.06640625,
|
|
"completions/min_length": 202.0,
|
|
"completions/min_terminated_length": 202.0,
|
|
"epoch": 0.027733333333333332,
|
|
"grad_norm": 0.02128530666232109,
|
|
"kl": 0.05477142333984375,
|
|
"learning_rate": 4.833333333333333e-06,
|
|
"loss": 0.0559,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.029835352674126625,
|
|
"mask/share_reasoning": 0.8532722592353821,
|
|
"mask/share_step_conf": 0.11689238250255585,
|
|
"num_tokens": 6189746.0,
|
|
"reward": 0.9097167253494263,
|
|
"reward_std": 0.16467860341072083,
|
|
"rewards/accuracy_reward_step": 0.62890625,
|
|
"rewards/asymmetric_l2_reward": 0.8499466180801392,
|
|
"rewards/final_brier_reward_step": 0.6468304395675659,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 26
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7680479288101196,
|
|
"adv/mean_abs_reasoning": 0.5606542825698853,
|
|
"adv/mean_abs_step_conf": 0.7543854713439941,
|
|
"adv/ratio_final_to_reasoning": 1.3699136039585729,
|
|
"adv/ratio_step_to_reasoning": 1.3455448300262656,
|
|
"adv/std_final_conf": 0.9101540446281433,
|
|
"adv/std_reasoning": 0.7928864359855652,
|
|
"adv/std_step_conf": 0.9347414374351501,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.4851177112066201,
|
|
"calib/avg_num_step_conf": 6.8125,
|
|
"calib/ece": 0.44495999999999986,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.976,
|
|
"calib/gap": 0.003723138110205859,
|
|
"calib/mean_conf": 0.96464,
|
|
"calib/mu_c": 0.9664122137404579,
|
|
"calib/mu_w": 0.9626890756302521,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.44279999999999986,
|
|
"calib/std_conf": 0.06608532666182411,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.49636254501800725,
|
|
"calib/step_q_c_n": 833.0,
|
|
"calib/step_q_gap": 0.0435634231738799,
|
|
"calib/step_q_w": 0.45279912184412735,
|
|
"calib/step_q_w_n": 911.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3047.0,
|
|
"completions/max_terminated_length": 3047.0,
|
|
"completions/mean_length": 556.5,
|
|
"completions/mean_terminated_length": 558.682373046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 193.0,
|
|
"epoch": 0.0288,
|
|
"grad_norm": 0.023293569684028625,
|
|
"kl": 0.046802520751953125,
|
|
"learning_rate": 4.805555555555556e-06,
|
|
"loss": 0.0577,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.030663229525089264,
|
|
"mask/share_reasoning": 0.8329758644104004,
|
|
"mask/share_step_conf": 0.13245464861392975,
|
|
"num_tokens": 6437426.0,
|
|
"reward": 0.8341401219367981,
|
|
"reward_std": 0.22353267669677734,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/asymmetric_l2_reward": 0.829703688621521,
|
|
"rewards/final_brier_reward_step": 0.5409202575683594,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 27
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7091407775878906,
|
|
"adv/mean_abs_reasoning": 0.362392783164978,
|
|
"adv/mean_abs_step_conf": 0.7690234184265137,
|
|
"adv/ratio_final_to_reasoning": 1.9568291934363848,
|
|
"adv/ratio_step_to_reasoning": 2.122071559235269,
|
|
"adv/std_final_conf": 0.8629666566848755,
|
|
"adv/std_reasoning": 0.6403860449790955,
|
|
"adv/std_step_conf": 0.9344640970230103,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5548206937095825,
|
|
"calib/avg_num_step_conf": 5.734375,
|
|
"calib/ece": 0.29453815261044186,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.9718875502008032,
|
|
"calib/gap": 0.010019841269841345,
|
|
"calib/mean_conf": 0.9612048192771084,
|
|
"calib/mu_c": 0.9644642857142858,
|
|
"calib/mu_w": 0.9544444444444444,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.29052208835341375,
|
|
"calib/std_conf": 0.08898893379640163,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.4770860215053763,
|
|
"calib/step_q_c_n": 930.0,
|
|
"calib/step_q_gap": 0.05139828916336886,
|
|
"calib/step_q_w": 0.42568773234200746,
|
|
"calib/step_q_w_n": 538.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2501.0,
|
|
"completions/max_terminated_length": 2501.0,
|
|
"completions/mean_length": 558.4375,
|
|
"completions/mean_terminated_length": 558.4375,
|
|
"completions/min_length": 186.0,
|
|
"completions/min_terminated_length": 186.0,
|
|
"epoch": 0.029866666666666666,
|
|
"grad_norm": 0.02454567328095436,
|
|
"kl": 0.04253387451171875,
|
|
"learning_rate": 4.777777777777778e-06,
|
|
"loss": 0.0252,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.029856320470571518,
|
|
"mask/share_reasoning": 0.8533777594566345,
|
|
"mask/share_step_conf": 0.11676593124866486,
|
|
"num_tokens": 6687330.0,
|
|
"reward": 0.9220882058143616,
|
|
"reward_std": 0.1710459291934967,
|
|
"rewards/accuracy_reward_step": 0.65625,
|
|
"rewards/asymmetric_l2_reward": 0.842483401298523,
|
|
"rewards/final_brier_reward_step": 0.6759117245674133,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 28
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.751983642578125,
|
|
"adv/mean_abs_reasoning": 0.4891800880432129,
|
|
"adv/mean_abs_step_conf": 0.755209743976593,
|
|
"adv/ratio_final_to_reasoning": 1.5372327307641696,
|
|
"adv/ratio_step_to_reasoning": 1.543827646373619,
|
|
"adv/std_final_conf": 0.9133455753326416,
|
|
"adv/std_reasoning": 0.7393599152565002,
|
|
"adv/std_step_conf": 0.9341092705726624,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.4830664633371458,
|
|
"calib/avg_num_step_conf": 6.45703125,
|
|
"calib/ece": 0.48569721115537856,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.9920318725099602,
|
|
"calib/gap": -0.007523827678230899,
|
|
"calib/mean_conf": 0.9637848605577689,
|
|
"calib/mu_c": 0.9599180327868853,
|
|
"calib/mu_w": 0.9674418604651162,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.48171314741035864,
|
|
"calib/std_conf": 0.06420535972185211,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.45259562841530054,
|
|
"calib/step_q_c_n": 732.0,
|
|
"calib/step_q_gap": 0.031010394973389588,
|
|
"calib/step_q_w": 0.42158523344191096,
|
|
"calib/step_q_w_n": 921.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2514.0,
|
|
"completions/max_terminated_length": 2514.0,
|
|
"completions/mean_length": 585.25,
|
|
"completions/mean_terminated_length": 589.8582763671875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.0,
|
|
"epoch": 0.030933333333333334,
|
|
"grad_norm": 0.029458940029144287,
|
|
"kl": 0.04529571533203125,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": -0.0735,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.027916226536035538,
|
|
"mask/share_reasoning": 0.8425346612930298,
|
|
"mask/share_step_conf": 0.12173663079738617,
|
|
"num_tokens": 6944282.0,
|
|
"reward": 0.8238710165023804,
|
|
"reward_std": 0.20323438942432404,
|
|
"rewards/accuracy_reward_step": 0.4765625,
|
|
"rewards/asymmetric_l2_reward": 0.8522884845733643,
|
|
"rewards/final_brier_reward_step": 0.5040472745895386,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 29
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7376535534858704,
|
|
"adv/mean_abs_reasoning": 0.4982551336288452,
|
|
"adv/mean_abs_step_conf": 0.7502148747444153,
|
|
"adv/ratio_final_to_reasoning": 1.4804735640423028,
|
|
"adv/ratio_step_to_reasoning": 1.5056841848884133,
|
|
"adv/std_final_conf": 0.9088844656944275,
|
|
"adv/std_reasoning": 0.7576410174369812,
|
|
"adv/std_step_conf": 0.9344900250434875,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.47781114447781114,
|
|
"calib/avg_num_step_conf": 6.62890625,
|
|
"calib/ece": 0.40760162601626015,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.9715447154471545,
|
|
"calib/gap": 0.012942942942942848,
|
|
"calib/mean_conf": 0.9563821138211382,
|
|
"calib/mu_c": 0.9622222222222221,
|
|
"calib/mu_w": 0.9492792792792792,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.40760162601626015,
|
|
"calib/std_conf": 0.0807153001610265,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42158899188876015,
|
|
"calib/step_q_c_n": 863.0,
|
|
"calib/step_q_gap": 0.014454699322812903,
|
|
"calib/step_q_w": 0.40713429256594724,
|
|
"calib/step_q_w_n": 834.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2535.0,
|
|
"completions/max_terminated_length": 2535.0,
|
|
"completions/mean_length": 626.84765625,
|
|
"completions/mean_terminated_length": 631.783447265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 200.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.02535308338701725,
|
|
"kl": 0.0458221435546875,
|
|
"learning_rate": 4.722222222222222e-06,
|
|
"loss": 0.0568,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.026801906526088715,
|
|
"mask/share_reasoning": 0.8470184803009033,
|
|
"mask/share_step_conf": 0.11836712062358856,
|
|
"num_tokens": 7211739.0,
|
|
"reward": 0.844096839427948,
|
|
"reward_std": 0.2074047029018402,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/asymmetric_l2_reward": 0.8272979259490967,
|
|
"rewards/final_brier_reward_step": 0.5632394552230835,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 30
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7183291912078857,
|
|
"adv/mean_abs_reasoning": 0.42496633529663086,
|
|
"adv/mean_abs_step_conf": 0.7648087739944458,
|
|
"adv/ratio_final_to_reasoning": 1.690320224321968,
|
|
"adv/ratio_step_to_reasoning": 1.799692612029142,
|
|
"adv/std_final_conf": 0.9244080185890198,
|
|
"adv/std_reasoning": 0.7204815149307251,
|
|
"adv/std_step_conf": 0.9345629215240479,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.4812748493010132,
|
|
"calib/avg_num_step_conf": 6.91015625,
|
|
"calib/ece": 0.5126693227091633,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.952191235059761,
|
|
"calib/gap": -0.007899833269205736,
|
|
"calib/mean_conf": 0.9559362549800797,
|
|
"calib/mu_c": 0.9515929203539825,
|
|
"calib/mu_w": 0.9594927536231882,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.509203187250996,
|
|
"calib/std_conf": 0.06133491690864156,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4562691131498471,
|
|
"calib/step_q_c_n": 654.0,
|
|
"calib/step_q_gap": 0.06567853019020581,
|
|
"calib/step_q_w": 0.3905905829596413,
|
|
"calib/step_q_w_n": 1115.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2377.0,
|
|
"completions/max_terminated_length": 2377.0,
|
|
"completions/mean_length": 613.2578125,
|
|
"completions/mean_terminated_length": 618.0866088867188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 189.0,
|
|
"epoch": 0.03306666666666667,
|
|
"grad_norm": 0.030099138617515564,
|
|
"kl": 0.04169464111328125,
|
|
"learning_rate": 4.694444444444445e-06,
|
|
"loss": 0.0082,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.028120990842580795,
|
|
"mask/share_reasoning": 0.8433920741081238,
|
|
"mask/share_step_conf": 0.12067442387342453,
|
|
"num_tokens": 7474645.0,
|
|
"reward": 0.8082037568092346,
|
|
"reward_std": 0.17442850768566132,
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
|
"rewards/asymmetric_l2_reward": 0.8525465726852417,
|
|
"rewards/final_brier_reward_step": 0.4794859290122986,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 31
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.72445148229599,
|
|
"adv/mean_abs_reasoning": 0.49374479055404663,
|
|
"adv/mean_abs_step_conf": 0.7507187128067017,
|
|
"adv/ratio_final_to_reasoning": 1.4672589891694048,
|
|
"adv/ratio_step_to_reasoning": 1.5204590046697941,
|
|
"adv/std_final_conf": 0.9220708012580872,
|
|
"adv/std_reasoning": 0.7753057479858398,
|
|
"adv/std_step_conf": 0.9348052144050598,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6214615735016595,
|
|
"calib/avg_num_step_conf": 6.03125,
|
|
"calib/ece": 0.43745967741935476,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.9032258064516129,
|
|
"calib/gap": 0.02743476280340973,
|
|
"calib/mean_conf": 0.9383467741935484,
|
|
"calib/mu_c": 0.9517322834645667,
|
|
"calib/mu_w": 0.924297520661157,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.4318548387096774,
|
|
"calib/std_conf": 0.11818899192469161,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4153371592539455,
|
|
"calib/step_q_c_n": 697.0,
|
|
"calib/step_q_gap": 0.04276336940742842,
|
|
"calib/step_q_w": 0.37257378984651707,
|
|
"calib/step_q_w_n": 847.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2412.0,
|
|
"completions/max_terminated_length": 2412.0,
|
|
"completions/mean_length": 592.4609375,
|
|
"completions/mean_terminated_length": 594.7843627929688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 194.0,
|
|
"epoch": 0.034133333333333335,
|
|
"grad_norm": 0.023633981123566628,
|
|
"kl": 0.0494842529296875,
|
|
"learning_rate": 4.666666666666667e-06,
|
|
"loss": 0.0108,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.029128411784768105,
|
|
"mask/share_reasoning": 0.8510886430740356,
|
|
"mask/share_step_conf": 0.1158766970038414,
|
|
"num_tokens": 7733019.0,
|
|
"reward": 0.8346266150474548,
|
|
"reward_std": 0.2019689679145813,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/asymmetric_l2_reward": 0.8270046710968018,
|
|
"rewards/final_brier_reward_step": 0.5500609874725342,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 32
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7848911285400391,
|
|
"adv/mean_abs_reasoning": 0.4544700086116791,
|
|
"adv/mean_abs_step_conf": 0.7658688426017761,
|
|
"adv/ratio_final_to_reasoning": 1.7270471398932017,
|
|
"adv/ratio_step_to_reasoning": 1.6851911635299375,
|
|
"adv/std_final_conf": 0.9196609258651733,
|
|
"adv/std_reasoning": 0.7013769149780273,
|
|
"adv/std_step_conf": 0.9336271286010742,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5360434596838186,
|
|
"calib/avg_num_step_conf": 6.3125,
|
|
"calib/ece": 0.4496875000000001,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.91015625,
|
|
"calib/gap": 0.008684612097906275,
|
|
"calib/mean_conf": 0.94578125,
|
|
"calib/mu_c": 0.9501574803149605,
|
|
"calib/mu_w": 0.9414728682170542,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4496875000000001,
|
|
"calib/std_conf": 0.061193971504041954,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3878806333739343,
|
|
"calib/step_q_c_n": 821.0,
|
|
"calib/step_q_gap": 0.009132834631795983,
|
|
"calib/step_q_w": 0.3787477987421383,
|
|
"calib/step_q_w_n": 795.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1359.0,
|
|
"completions/max_terminated_length": 1359.0,
|
|
"completions/mean_length": 535.2109375,
|
|
"completions/mean_terminated_length": 537.309814453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.0352,
|
|
"grad_norm": 0.021006744354963303,
|
|
"kl": 0.052581787109375,
|
|
"learning_rate": 4.638888888888889e-06,
|
|
"loss": 0.0037,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03007342480123043,
|
|
"mask/share_reasoning": 0.8435391187667847,
|
|
"mask/share_step_conf": 0.12248119711875916,
|
|
"num_tokens": 7976905.0,
|
|
"reward": 0.8592232465744019,
|
|
"reward_std": 0.16713036596775055,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/asymmetric_l2_reward": 0.8708338141441345,
|
|
"rewards/final_brier_reward_step": 0.548393726348877,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 33
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7719642519950867,
|
|
"adv/mean_abs_reasoning": 0.6268492937088013,
|
|
"adv/mean_abs_step_conf": 0.7533676624298096,
|
|
"adv/ratio_final_to_reasoning": 1.2314989579516022,
|
|
"adv/ratio_step_to_reasoning": 1.2018321947408648,
|
|
"adv/std_final_conf": 0.9304158091545105,
|
|
"adv/std_reasoning": 0.8266779780387878,
|
|
"adv/std_step_conf": 0.934239387512207,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5234611666129553,
|
|
"calib/avg_num_step_conf": 6.45703125,
|
|
"calib/ece": 0.3810714285714285,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.9087301587301587,
|
|
"calib/gap": -0.0008849500483399941,
|
|
"calib/mean_conf": 0.9373412698412698,
|
|
"calib/mu_c": 0.9369655172413794,
|
|
"calib/mu_w": 0.9378504672897194,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.37150793650793645,
|
|
"calib/std_conf": 0.08487429198324628,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.34746463547334067,
|
|
"calib/step_q_c_n": 919.0,
|
|
"calib/step_q_gap": 0.025979621849362422,
|
|
"calib/step_q_w": 0.32148501362397824,
|
|
"calib/step_q_w_n": 734.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2888.0,
|
|
"completions/max_terminated_length": 2888.0,
|
|
"completions/mean_length": 521.78515625,
|
|
"completions/mean_terminated_length": 521.78515625,
|
|
"completions/min_length": 206.0,
|
|
"completions/min_terminated_length": 206.0,
|
|
"epoch": 0.03626666666666667,
|
|
"grad_norm": 0.021358368918299675,
|
|
"kl": 0.061359405517578125,
|
|
"learning_rate": 4.611111111111112e-06,
|
|
"loss": 0.0417,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03217000514268875,
|
|
"mask/share_reasoning": 0.8295138478279114,
|
|
"mask/share_step_conf": 0.13831612467765808,
|
|
"num_tokens": 8215594.0,
|
|
"reward": 0.8955637216567993,
|
|
"reward_std": 0.2316317856311798,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/asymmetric_l2_reward": 0.8735677003860474,
|
|
"rewards/final_brier_reward_step": 0.6074035167694092,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 34
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7504846453666687,
|
|
"adv/mean_abs_reasoning": 0.4689059257507324,
|
|
"adv/mean_abs_step_conf": 0.7678923606872559,
|
|
"adv/ratio_final_to_reasoning": 1.6005015167277323,
|
|
"adv/ratio_step_to_reasoning": 1.6376256270548026,
|
|
"adv/std_final_conf": 0.9261394143104553,
|
|
"adv/std_reasoning": 0.7205601334571838,
|
|
"adv/std_step_conf": 0.9343000054359436,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.593344965104686,
|
|
"calib/avg_num_step_conf": 5.63671875,
|
|
"calib/ece": 0.3975590551181102,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.8188976377952756,
|
|
"calib/gap": 0.032642073778663905,
|
|
"calib/mean_conf": 0.9130708661417323,
|
|
"calib/mu_c": 0.9282352941176469,
|
|
"calib/mu_w": 0.895593220338983,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.38759842519685034,
|
|
"calib/std_conf": 0.1352309882195658,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3523180592991914,
|
|
"calib/step_q_c_n": 742.0,
|
|
"calib/step_q_gap": 0.02979309496252952,
|
|
"calib/step_q_w": 0.3225249643366619,
|
|
"calib/step_q_w_n": 701.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2469.0,
|
|
"completions/max_terminated_length": 2469.0,
|
|
"completions/mean_length": 582.58203125,
|
|
"completions/mean_terminated_length": 582.58203125,
|
|
"completions/min_length": 193.0,
|
|
"completions/min_terminated_length": 193.0,
|
|
"epoch": 0.037333333333333336,
|
|
"grad_norm": 0.022607261314988136,
|
|
"kl": 0.05496978759765625,
|
|
"learning_rate": 4.583333333333333e-06,
|
|
"loss": -0.0247,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.02826719731092453,
|
|
"mask/share_reasoning": 0.8655970096588135,
|
|
"mask/share_step_conf": 0.106135793030262,
|
|
"num_tokens": 8473991.0,
|
|
"reward": 0.8837124705314636,
|
|
"reward_std": 0.18118566274642944,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/asymmetric_l2_reward": 0.8655635714530945,
|
|
"rewards/final_brier_reward_step": 0.5979551076889038,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 35
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7104417085647583,
|
|
"adv/mean_abs_reasoning": 0.39619794487953186,
|
|
"adv/mean_abs_step_conf": 0.7653812170028687,
|
|
"adv/ratio_final_to_reasoning": 1.7931483939947632,
|
|
"adv/ratio_step_to_reasoning": 1.9318152123065424,
|
|
"adv/std_final_conf": 0.9201058745384216,
|
|
"adv/std_reasoning": 0.7012619376182556,
|
|
"adv/std_step_conf": 0.9333252906799316,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5465106897942719,
|
|
"calib/avg_num_step_conf": 6.12890625,
|
|
"calib/ece": 0.21273809523809523,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.8134920634920635,
|
|
"calib/gap": 0.009519160951996675,
|
|
"calib/mean_conf": 0.9189285714285714,
|
|
"calib/mu_c": 0.9214594594594593,
|
|
"calib/mu_w": 0.9119402985074626,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.19876984126984126,
|
|
"calib/std_conf": 0.1101119688844495,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3337616179001721,
|
|
"calib/step_q_c_n": 1162.0,
|
|
"calib/step_q_gap": 0.025334099472653693,
|
|
"calib/step_q_w": 0.3084275184275184,
|
|
"calib/step_q_w_n": 407.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2003.0,
|
|
"completions/max_terminated_length": 2003.0,
|
|
"completions/mean_length": 528.51171875,
|
|
"completions/mean_terminated_length": 528.51171875,
|
|
"completions/min_length": 193.0,
|
|
"completions/min_terminated_length": 193.0,
|
|
"epoch": 0.0384,
|
|
"grad_norm": 0.046310946345329285,
|
|
"kl": 0.07181549072265625,
|
|
"learning_rate": 4.555555555555556e-06,
|
|
"loss": -0.0601,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03279054909944534,
|
|
"mask/share_reasoning": 0.8338136672973633,
|
|
"mask/share_step_conf": 0.13339582085609436,
|
|
"num_tokens": 8712002.0,
|
|
"reward": 0.9817196726799011,
|
|
"reward_std": 0.14960414171218872,
|
|
"rewards/accuracy_reward_step": 0.72265625,
|
|
"rewards/asymmetric_l2_reward": 0.8716880679130554,
|
|
"rewards/final_brier_reward_step": 0.7503449320793152,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 36
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7335127592086792,
|
|
"adv/mean_abs_reasoning": 0.3861789107322693,
|
|
"adv/mean_abs_step_conf": 0.7399336099624634,
|
|
"adv/ratio_final_to_reasoning": 1.8994117462753166,
|
|
"adv/ratio_step_to_reasoning": 1.9160383682252542,
|
|
"adv/std_final_conf": 0.9107916951179504,
|
|
"adv/std_reasoning": 0.6816076636314392,
|
|
"adv/std_step_conf": 0.9339763522148132,
|
|
"calib/answer_extract_rate": 0.95703125,
|
|
"calib/auroc": 0.5764084980502892,
|
|
"calib/avg_num_step_conf": 6.046875,
|
|
"calib/ece": 0.47885714285714287,
|
|
"calib/final_conf_rate": 0.95703125,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.8857142857142857,
|
|
"calib/gap": 0.03537851284119908,
|
|
"calib/mean_conf": 0.9215510204081632,
|
|
"calib/mu_c": 0.9409009009009006,
|
|
"calib/mu_w": 0.9055223880597015,
|
|
"calib/nonempty_final_conf_rate": 0.95703125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.4736734693877551,
|
|
"calib/std_conf": 0.13022759687852142,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.3618576388888889,
|
|
"calib/step_q_c_n": 576.0,
|
|
"calib/step_q_gap": 0.07817451131687242,
|
|
"calib/step_q_w": 0.28368312757201647,
|
|
"calib/step_q_w_n": 972.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2500.0,
|
|
"completions/max_terminated_length": 2500.0,
|
|
"completions/mean_length": 562.7578125,
|
|
"completions/mean_terminated_length": 569.4308471679688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.039466666666666664,
|
|
"grad_norm": 0.04367915168404579,
|
|
"kl": 0.06029510498046875,
|
|
"learning_rate": 4.527777777777778e-06,
|
|
"loss": -0.0503,
|
|
"mask/has_final_conf_rate": 0.95703125,
|
|
"mask/share_final_conf": 0.029208239167928696,
|
|
"mask/share_reasoning": 0.842954158782959,
|
|
"mask/share_step_conf": 0.11611886322498322,
|
|
"num_tokens": 8963164.0,
|
|
"reward": 0.8178337812423706,
|
|
"reward_std": 0.1575174331665039,
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
|
"rewards/asymmetric_l2_reward": 0.8471627235412598,
|
|
"rewards/final_brier_reward_step": 0.5103796720504761,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"step": 37
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7438644766807556,
|
|
"adv/mean_abs_reasoning": 0.3514899015426636,
|
|
"adv/mean_abs_step_conf": 0.7278769612312317,
|
|
"adv/ratio_final_to_reasoning": 2.1163182026453353,
|
|
"adv/ratio_step_to_reasoning": 2.0708332103899223,
|
|
"adv/std_final_conf": 0.9110886454582214,
|
|
"adv/std_reasoning": 0.6403597593307495,
|
|
"adv/std_step_conf": 0.9339098930358887,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6162587412587412,
|
|
"calib/avg_num_step_conf": 6.0,
|
|
"calib/ece": 0.4017670682730922,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.8192771084337349,
|
|
"calib/gap": 0.022062937062936938,
|
|
"calib/mean_conf": 0.9255421686746987,
|
|
"calib/mu_c": 0.9359090909090908,
|
|
"calib/mu_w": 0.9138461538461539,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.39859437751004,
|
|
"calib/std_conf": 0.07661387585031114,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.33544607190412784,
|
|
"calib/step_q_c_n": 751.0,
|
|
"calib/step_q_gap": 0.037586199292662825,
|
|
"calib/step_q_w": 0.297859872611465,
|
|
"calib/step_q_w_n": 785.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2778.0,
|
|
"completions/max_terminated_length": 2778.0,
|
|
"completions/mean_length": 579.6796875,
|
|
"completions/mean_terminated_length": 581.9530029296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.04053333333333333,
|
|
"grad_norm": 0.021768247708678246,
|
|
"kl": 0.060333251953125,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 0.0435,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03031359612941742,
|
|
"mask/share_reasoning": 0.8476117849349976,
|
|
"mask/share_step_conf": 0.11816837638616562,
|
|
"num_tokens": 9218450.0,
|
|
"reward": 0.8697977066040039,
|
|
"reward_std": 0.14932399988174438,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/asymmetric_l2_reward": 0.8586658239364624,
|
|
"rewards/final_brier_reward_step": 0.5832734107971191,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 38
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7321931719779968,
|
|
"adv/mean_abs_reasoning": 0.4579048156738281,
|
|
"adv/mean_abs_step_conf": 0.7451863884925842,
|
|
"adv/ratio_final_to_reasoning": 1.5990073633546322,
|
|
"adv/ratio_step_to_reasoning": 1.6273827288669327,
|
|
"adv/std_final_conf": 0.9224193692207336,
|
|
"adv/std_reasoning": 0.7205734252929688,
|
|
"adv/std_step_conf": 0.9335898756980896,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6688839615668883,
|
|
"calib/avg_num_step_conf": 6.234375,
|
|
"calib/ece": 0.38768627450980386,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.7725490196078432,
|
|
"calib/gap": 0.05266075388026592,
|
|
"calib/mean_conf": 0.8949019607843137,
|
|
"calib/mu_c": 0.9203030303030303,
|
|
"calib/mu_w": 0.8676422764227644,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.3824705882352941,
|
|
"calib/std_conf": 0.16491676509051476,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3508021390374331,
|
|
"calib/step_q_c_n": 748.0,
|
|
"calib/step_q_gap": 0.03972902582988591,
|
|
"calib/step_q_w": 0.3110731132075472,
|
|
"calib/step_q_w_n": 848.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2893.0,
|
|
"completions/max_terminated_length": 2893.0,
|
|
"completions/mean_length": 546.03125,
|
|
"completions/mean_terminated_length": 546.03125,
|
|
"completions/min_length": 206.0,
|
|
"completions/min_terminated_length": 206.0,
|
|
"epoch": 0.0416,
|
|
"grad_norm": 0.028995206579566002,
|
|
"kl": 0.0600128173828125,
|
|
"learning_rate": 4.472222222222223e-06,
|
|
"loss": -0.0347,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.030841421335935593,
|
|
"mask/share_reasoning": 0.8439503908157349,
|
|
"mask/share_step_conf": 0.12520815432071686,
|
|
"num_tokens": 9464322.0,
|
|
"reward": 0.8865332007408142,
|
|
"reward_std": 0.1797136813402176,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/asymmetric_l2_reward": 0.8740624785423279,
|
|
"rewards/final_brier_reward_step": 0.5990039110183716,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 39
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7255151867866516,
|
|
"adv/mean_abs_reasoning": 0.5067353844642639,
|
|
"adv/mean_abs_step_conf": 0.7520684003829956,
|
|
"adv/ratio_final_to_reasoning": 1.4317436852247616,
|
|
"adv/ratio_step_to_reasoning": 1.4841442366968418,
|
|
"adv/std_final_conf": 0.9328517913818359,
|
|
"adv/std_reasoning": 0.7926381826400757,
|
|
"adv/std_step_conf": 0.9337299466133118,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.4924899446958271,
|
|
"calib/avg_num_step_conf": 5.52734375,
|
|
"calib/ece": 0.45719367588932813,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.7075098814229249,
|
|
"calib/gap": -0.004004524886877858,
|
|
"calib/mean_conf": 0.9013833992094862,
|
|
"calib/mu_c": 0.8992307692307693,
|
|
"calib/mu_w": 0.9032352941176471,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4480632411067194,
|
|
"calib/std_conf": 0.12519101311172237,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.35236206896551725,
|
|
"calib/step_q_c_n": 580.0,
|
|
"calib/step_q_gap": 0.020086619863720845,
|
|
"calib/step_q_w": 0.3322754491017964,
|
|
"calib/step_q_w_n": 835.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2236.0,
|
|
"completions/max_terminated_length": 2236.0,
|
|
"completions/mean_length": 570.9453125,
|
|
"completions/mean_terminated_length": 573.184326171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 197.0,
|
|
"epoch": 0.042666666666666665,
|
|
"grad_norm": 0.029202446341514587,
|
|
"kl": 0.06134796142578125,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": -0.0549,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.030164752155542374,
|
|
"mask/share_reasoning": 0.8536300659179688,
|
|
"mask/share_step_conf": 0.11229896545410156,
|
|
"num_tokens": 9717244.0,
|
|
"reward": 0.8443441390991211,
|
|
"reward_std": 0.1785648614168167,
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
|
"rewards/asymmetric_l2_reward": 0.8648823499679565,
|
|
"rewards/final_brier_reward_step": 0.5347433686256409,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 40
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7701693773269653,
|
|
"adv/mean_abs_reasoning": 0.47483962774276733,
|
|
"adv/mean_abs_step_conf": 0.7666299343109131,
|
|
"adv/ratio_final_to_reasoning": 1.6219568299050762,
|
|
"adv/ratio_step_to_reasoning": 1.6145028542693913,
|
|
"adv/std_final_conf": 0.9186797142028809,
|
|
"adv/std_reasoning": 0.7206448316574097,
|
|
"adv/std_step_conf": 0.9336923360824585,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6092863894139887,
|
|
"calib/avg_num_step_conf": 5.66015625,
|
|
"calib/ece": 0.19758893280632403,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6719367588932806,
|
|
"calib/gap": 0.041159420289854975,
|
|
"calib/mean_conf": 0.8674703557312253,
|
|
"calib/mu_c": 0.8786956521739129,
|
|
"calib/mu_w": 0.8375362318840579,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.168893280632411,
|
|
"calib/std_conf": 0.1793265952873243,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3456231599607458,
|
|
"calib/step_q_c_n": 1019.0,
|
|
"calib/step_q_gap": 0.016971764611908524,
|
|
"calib/step_q_w": 0.32865139534883725,
|
|
"calib/step_q_w_n": 430.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2388.0,
|
|
"completions/max_terminated_length": 2388.0,
|
|
"completions/mean_length": 526.03515625,
|
|
"completions/mean_terminated_length": 526.03515625,
|
|
"completions/min_length": 192.0,
|
|
"completions/min_terminated_length": 192.0,
|
|
"epoch": 0.04373333333333333,
|
|
"grad_norm": 0.03552259877324104,
|
|
"kl": 0.05785369873046875,
|
|
"learning_rate": 4.416666666666667e-06,
|
|
"loss": 0.0089,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03249321132898331,
|
|
"mask/share_reasoning": 0.8456205129623413,
|
|
"mask/share_step_conf": 0.121886245906353,
|
|
"num_tokens": 9959157.0,
|
|
"reward": 0.9845508337020874,
|
|
"reward_std": 0.17637822031974792,
|
|
"rewards/accuracy_reward_step": 0.71875,
|
|
"rewards/asymmetric_l2_reward": 0.8705066442489624,
|
|
"rewards/final_brier_reward_step": 0.7571886777877808,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 41
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7168235778808594,
|
|
"adv/mean_abs_reasoning": 0.3787510395050049,
|
|
"adv/mean_abs_step_conf": 0.7346171736717224,
|
|
"adv/ratio_final_to_reasoning": 1.8925983115919267,
|
|
"adv/ratio_step_to_reasoning": 1.9395779735200305,
|
|
"adv/std_final_conf": 0.9166406393051147,
|
|
"adv/std_reasoning": 0.6814785599708557,
|
|
"adv/std_step_conf": 0.9328687787055969,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.630614352090862,
|
|
"calib/avg_num_step_conf": 6.328125,
|
|
"calib/ece": 0.3178656126482214,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6758893280632411,
|
|
"calib/gap": 0.05375709860609179,
|
|
"calib/mean_conf": 0.8887747035573121,
|
|
"calib/mu_c": 0.9108724832214763,
|
|
"calib/mu_w": 0.8571153846153845,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3088537549407115,
|
|
"calib/std_conf": 0.1539560041628695,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3585111111111111,
|
|
"calib/step_q_c_n": 900.0,
|
|
"calib/step_q_gap": 0.04060833333333336,
|
|
"calib/step_q_w": 0.31790277777777776,
|
|
"calib/step_q_w_n": 720.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2391.0,
|
|
"completions/max_terminated_length": 2391.0,
|
|
"completions/mean_length": 480.5859375,
|
|
"completions/mean_terminated_length": 482.4706115722656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 220.0,
|
|
"epoch": 0.0448,
|
|
"grad_norm": 0.02795771323144436,
|
|
"kl": 0.0621185302734375,
|
|
"learning_rate": 4.388888888888889e-06,
|
|
"loss": -0.0047,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.032921478152275085,
|
|
"mask/share_reasoning": 0.8269742727279663,
|
|
"mask/share_step_conf": 0.1361980140209198,
|
|
"num_tokens": 10186555.0,
|
|
"reward": 0.9259449243545532,
|
|
"reward_std": 0.14228272438049316,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/asymmetric_l2_reward": 0.8753531575202942,
|
|
"rewards/final_brier_reward_step": 0.6624742150306702,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 42
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7828581929206848,
|
|
"adv/mean_abs_reasoning": 0.5349443554878235,
|
|
"adv/mean_abs_step_conf": 0.7610698938369751,
|
|
"adv/ratio_final_to_reasoning": 1.4634385518598194,
|
|
"adv/ratio_step_to_reasoning": 1.4227085229134617,
|
|
"adv/std_final_conf": 0.930379331111908,
|
|
"adv/std_reasoning": 0.7753865122795105,
|
|
"adv/std_step_conf": 0.9323244690895081,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6944133383103148,
|
|
"calib/avg_num_step_conf": 5.51953125,
|
|
"calib/ece": 0.25788235294117645,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.4980392156862745,
|
|
"calib/gap": 0.1379805897723031,
|
|
"calib/mean_conf": 0.8031372549019609,
|
|
"calib/mu_c": 0.8648226950354609,
|
|
"calib/mu_w": 0.7268421052631578,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2540392156862744,
|
|
"calib/std_conf": 0.22322669560713412,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3525706940874036,
|
|
"calib/step_q_c_n": 778.0,
|
|
"calib/step_q_gap": 0.020271481488978393,
|
|
"calib/step_q_w": 0.3322992125984252,
|
|
"calib/step_q_w_n": 635.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3071.0,
|
|
"completions/max_terminated_length": 3071.0,
|
|
"completions/mean_length": 518.61328125,
|
|
"completions/mean_terminated_length": 518.61328125,
|
|
"completions/min_length": 143.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.04586666666666667,
|
|
"grad_norm": 0.030341370031237602,
|
|
"kl": 0.0573577880859375,
|
|
"learning_rate": 4.361111111111112e-06,
|
|
"loss": 0.0182,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03317509591579437,
|
|
"mask/share_reasoning": 0.8494628667831421,
|
|
"mask/share_step_conf": 0.11736202985048294,
|
|
"num_tokens": 10424544.0,
|
|
"reward": 0.9504603743553162,
|
|
"reward_std": 0.17397907376289368,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/asymmetric_l2_reward": 0.885722279548645,
|
|
"rewards/final_brier_reward_step": 0.7058234810829163,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 43
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.77141273021698,
|
|
"adv/mean_abs_reasoning": 0.5115665197372437,
|
|
"adv/mean_abs_step_conf": 0.7307957410812378,
|
|
"adv/ratio_final_to_reasoning": 1.5079421745840627,
|
|
"adv/ratio_step_to_reasoning": 1.4285448966765788,
|
|
"adv/std_final_conf": 0.9244205355644226,
|
|
"adv/std_reasoning": 0.7575879096984863,
|
|
"adv/std_step_conf": 0.9330092072486877,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6712398373983739,
|
|
"calib/avg_num_step_conf": 6.0703125,
|
|
"calib/ece": 0.33601593625498005,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.5179282868525896,
|
|
"calib/gap": 0.1214399136178862,
|
|
"calib/mean_conf": 0.8069322709163347,
|
|
"calib/mu_c": 0.8688617886178862,
|
|
"calib/mu_w": 0.747421875,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.32645418326693226,
|
|
"calib/std_conf": 0.2188446468993844,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.37137755102040815,
|
|
"calib/step_q_c_n": 784.0,
|
|
"calib/step_q_gap": 0.04472820037105746,
|
|
"calib/step_q_w": 0.3266493506493507,
|
|
"calib/step_q_w_n": 770.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2883.0,
|
|
"completions/max_terminated_length": 2883.0,
|
|
"completions/mean_length": 587.4921875,
|
|
"completions/mean_terminated_length": 589.796142578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 250.0,
|
|
"epoch": 0.046933333333333334,
|
|
"grad_norm": 0.03289037570357323,
|
|
"kl": 0.049488067626953125,
|
|
"learning_rate": 4.333333333333334e-06,
|
|
"loss": -0.061,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.027674272656440735,
|
|
"mask/share_reasoning": 0.8532435894012451,
|
|
"mask/share_step_conf": 0.11517593264579773,
|
|
"num_tokens": 10681262.0,
|
|
"reward": 0.9121721982955933,
|
|
"reward_std": 0.17224377393722534,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/asymmetric_l2_reward": 0.8826147317886353,
|
|
"rewards/final_brier_reward_step": 0.6495422124862671,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 44
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.726306140422821,
|
|
"adv/mean_abs_reasoning": 0.43901243805885315,
|
|
"adv/mean_abs_step_conf": 0.7687171101570129,
|
|
"adv/ratio_final_to_reasoning": 1.6544090268473302,
|
|
"adv/ratio_step_to_reasoning": 1.7510144212678553,
|
|
"adv/std_final_conf": 0.9306395053863525,
|
|
"adv/std_reasoning": 0.7204497456550598,
|
|
"adv/std_step_conf": 0.9319562315940857,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6645899554990464,
|
|
"calib/avg_num_step_conf": 6.30078125,
|
|
"calib/ece": 0.2612252964426878,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5375494071146245,
|
|
"calib/gap": 0.12187412587412572,
|
|
"calib/mean_conf": 0.7992490118577076,
|
|
"calib/mu_c": 0.8522377622377623,
|
|
"calib/mu_w": 0.7303636363636365,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.24762845849802378,
|
|
"calib/std_conf": 0.23942253743503325,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3627835051546392,
|
|
"calib/step_q_c_n": 873.0,
|
|
"calib/step_q_gap": 0.02529701866815276,
|
|
"calib/step_q_w": 0.33748648648648644,
|
|
"calib/step_q_w_n": 740.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2101.0,
|
|
"completions/max_terminated_length": 2101.0,
|
|
"completions/mean_length": 542.84765625,
|
|
"completions/mean_terminated_length": 544.9765014648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.031237227842211723,
|
|
"kl": 0.054935455322265625,
|
|
"learning_rate": 4.305555555555556e-06,
|
|
"loss": -0.0223,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.031349748373031616,
|
|
"mask/share_reasoning": 0.8371763229370117,
|
|
"mask/share_step_conf": 0.12756764888763428,
|
|
"num_tokens": 10925279.0,
|
|
"reward": 0.9448119401931763,
|
|
"reward_std": 0.15327224135398865,
|
|
"rewards/accuracy_reward_step": 0.55859375,
|
|
"rewards/asymmetric_l2_reward": 0.8864164352416992,
|
|
"rewards/final_brier_reward_step": 0.6938323974609375,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 45
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7354565858840942,
|
|
"adv/mean_abs_reasoning": 0.40667724609375,
|
|
"adv/mean_abs_step_conf": 0.7561466693878174,
|
|
"adv/ratio_final_to_reasoning": 1.8084527544837161,
|
|
"adv/ratio_step_to_reasoning": 1.8593286854645055,
|
|
"adv/std_final_conf": 0.919183075428009,
|
|
"adv/std_reasoning": 0.6816875338554382,
|
|
"adv/std_step_conf": 0.9330542087554932,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6204044117647058,
|
|
"calib/avg_num_step_conf": 6.12890625,
|
|
"calib/ece": 0.283508064516129,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.5362903225806451,
|
|
"calib/gap": 0.09378676470588243,
|
|
"calib/mean_conf": 0.7820564516129033,
|
|
"calib/mu_c": 0.8244117647058824,
|
|
"calib/mu_w": 0.730625,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2585887096774193,
|
|
"calib/std_conf": 0.2628921894329086,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.35542750929368033,
|
|
"calib/step_q_c_n": 807.0,
|
|
"calib/step_q_gap": 0.02335401848003199,
|
|
"calib/step_q_w": 0.33207349081364834,
|
|
"calib/step_q_w_n": 762.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2446.0,
|
|
"completions/max_terminated_length": 2446.0,
|
|
"completions/mean_length": 589.328125,
|
|
"completions/mean_terminated_length": 589.328125,
|
|
"completions/min_length": 150.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.04906666666666667,
|
|
"grad_norm": 0.0332394540309906,
|
|
"kl": 0.04784393310546875,
|
|
"learning_rate": 4.277777777777778e-06,
|
|
"loss": -0.0168,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.032339829951524734,
|
|
"mask/share_reasoning": 0.8446968197822571,
|
|
"mask/share_step_conf": 0.12296333909034729,
|
|
"num_tokens": 11180915.0,
|
|
"reward": 0.9091041088104248,
|
|
"reward_std": 0.16267403960227966,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/asymmetric_l2_reward": 0.8642227053642273,
|
|
"rewards/final_brier_reward_step": 0.6539855599403381,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 46
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7560012340545654,
|
|
"adv/mean_abs_reasoning": 0.48656049370765686,
|
|
"adv/mean_abs_step_conf": 0.7640652656555176,
|
|
"adv/ratio_final_to_reasoning": 1.553766168506065,
|
|
"adv/ratio_step_to_reasoning": 1.5703397121974223,
|
|
"adv/std_final_conf": 0.9190072417259216,
|
|
"adv/std_reasoning": 0.7393056750297546,
|
|
"adv/std_step_conf": 0.932059109210968,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7265749601275917,
|
|
"calib/avg_num_step_conf": 6.23828125,
|
|
"calib/ece": 0.1629083665338646,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.47808764940239046,
|
|
"calib/gap": 0.20046318447634248,
|
|
"calib/mean_conf": 0.7527091633466135,
|
|
"calib/mu_c": 0.8317763157894738,
|
|
"calib/mu_w": 0.6313131313131313,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.15501992031872514,
|
|
"calib/std_conf": 0.25110399877916256,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.35576086956521746,
|
|
"calib/step_q_c_n": 920.0,
|
|
"calib/step_q_gap": 0.020487605163444944,
|
|
"calib/step_q_w": 0.3352732644017725,
|
|
"calib/step_q_w_n": 677.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2268.0,
|
|
"completions/max_terminated_length": 2268.0,
|
|
"completions/mean_length": 584.73828125,
|
|
"completions/mean_terminated_length": 589.342529296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 174.0,
|
|
"epoch": 0.050133333333333335,
|
|
"grad_norm": 0.05675409361720085,
|
|
"kl": 0.051654815673828125,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": -0.0528,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.028933309018611908,
|
|
"mask/share_reasoning": 0.8470029830932617,
|
|
"mask/share_step_conf": 0.11625122278928757,
|
|
"num_tokens": 11436584.0,
|
|
"reward": 0.9713031053543091,
|
|
"reward_std": 0.14311102032661438,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/asymmetric_l2_reward": 0.87469881772995,
|
|
"rewards/final_brier_reward_step": 0.7538449168205261,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 47
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7458685040473938,
|
|
"adv/mean_abs_reasoning": 0.45874661207199097,
|
|
"adv/mean_abs_step_conf": 0.7546839118003845,
|
|
"adv/ratio_final_to_reasoning": 1.625883405827409,
|
|
"adv/ratio_step_to_reasoning": 1.645099695432633,
|
|
"adv/std_final_conf": 0.9048066139221191,
|
|
"adv/std_reasoning": 0.7014490365982056,
|
|
"adv/std_step_conf": 0.9324750304222107,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6762935450819672,
|
|
"calib/avg_num_step_conf": 5.29296875,
|
|
"calib/ece": 0.2634400000000001,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.52,
|
|
"calib/gap": 0.15932248975409835,
|
|
"calib/mean_conf": 0.74936,
|
|
"calib/mu_c": 0.827109375,
|
|
"calib/mu_w": 0.6677868852459017,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.25040000000000007,
|
|
"calib/std_conf": 0.2668115259879153,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.38141732283464563,
|
|
"calib/step_q_c_n": 635.0,
|
|
"calib/step_q_gap": 0.009236767279090063,
|
|
"calib/step_q_w": 0.37218055555555557,
|
|
"calib/step_q_w_n": 720.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2772.0,
|
|
"completions/max_terminated_length": 2772.0,
|
|
"completions/mean_length": 515.4296875,
|
|
"completions/mean_terminated_length": 517.4509887695312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 168.0,
|
|
"epoch": 0.0512,
|
|
"grad_norm": 0.041724901646375656,
|
|
"kl": 0.057010650634765625,
|
|
"learning_rate": 4.222222222222223e-06,
|
|
"loss": -0.0685,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.033603981137275696,
|
|
"mask/share_reasoning": 0.8387103080749512,
|
|
"mask/share_step_conf": 0.12377943843603134,
|
|
"num_tokens": 11672222.0,
|
|
"reward": 0.919036865234375,
|
|
"reward_std": 0.13520202040672302,
|
|
"rewards/accuracy_reward_step": 0.5,
|
|
"rewards/asymmetric_l2_reward": 0.8569885492324829,
|
|
"rewards/final_brier_reward_step": 0.6857726573944092,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 48
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7114934325218201,
|
|
"adv/mean_abs_reasoning": 0.46754950284957886,
|
|
"adv/mean_abs_step_conf": 0.7762830257415771,
|
|
"adv/ratio_final_to_reasoning": 1.5217499498672837,
|
|
"adv/ratio_step_to_reasoning": 1.66032264179591,
|
|
"adv/std_final_conf": 0.868726372718811,
|
|
"adv/std_reasoning": 0.7015178799629211,
|
|
"adv/std_step_conf": 0.9322298765182495,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.682031040941932,
|
|
"calib/avg_num_step_conf": 5.69140625,
|
|
"calib/ece": 0.28253012048192777,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.714859437751004,
|
|
"calib/gap": 0.136437650521809,
|
|
"calib/mean_conf": 0.8418875502008032,
|
|
"calib/mu_c": 0.8972297297297297,
|
|
"calib/mu_w": 0.7607920792079207,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.2650200803212852,
|
|
"calib/std_conf": 0.23948583521095618,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.4151843043995244,
|
|
"calib/step_q_c_n": 841.0,
|
|
"calib/step_q_gap": 0.04015183686705692,
|
|
"calib/step_q_w": 0.3750324675324675,
|
|
"calib/step_q_w_n": 616.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2635.0,
|
|
"completions/max_terminated_length": 2635.0,
|
|
"completions/mean_length": 529.74609375,
|
|
"completions/mean_terminated_length": 531.8235473632812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 208.0,
|
|
"epoch": 0.05226666666666667,
|
|
"grad_norm": 0.04617791995406151,
|
|
"kl": 0.058994293212890625,
|
|
"learning_rate": 4.194444444444445e-06,
|
|
"loss": -0.0631,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03090585768222809,
|
|
"mask/share_reasoning": 0.8450629711151123,
|
|
"mask/share_step_conf": 0.12012490630149841,
|
|
"num_tokens": 11912373.0,
|
|
"reward": 0.9283610582351685,
|
|
"reward_std": 0.18739992380142212,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/asymmetric_l2_reward": 0.8635416626930237,
|
|
"rewards/final_brier_reward_step": 0.6838054656982422,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 49
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6450693607330322,
|
|
"adv/mean_abs_reasoning": 0.384204626083374,
|
|
"adv/mean_abs_step_conf": 0.7576757073402405,
|
|
"adv/ratio_final_to_reasoning": 1.6789734348306609,
|
|
"adv/ratio_step_to_reasoning": 1.972062947456082,
|
|
"adv/std_final_conf": 0.846005380153656,
|
|
"adv/std_reasoning": 0.6612586975097656,
|
|
"adv/std_step_conf": 0.9314751625061035,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.7366114230927269,
|
|
"calib/avg_num_step_conf": 5.6953125,
|
|
"calib/ece": 0.24157894736842106,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.680161943319838,
|
|
"calib/gap": 0.14676003287220918,
|
|
"calib/mean_conf": 0.8274089068825912,
|
|
"calib/mu_c": 0.8856375838926173,
|
|
"calib/mu_w": 0.7388775510204081,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.2328744939271255,
|
|
"calib/std_conf": 0.24239478038170492,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.43133171912832935,
|
|
"calib/step_q_c_n": 826.0,
|
|
"calib/step_q_gap": 0.046189314065038234,
|
|
"calib/step_q_w": 0.3851424050632911,
|
|
"calib/step_q_w_n": 632.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2149.0,
|
|
"completions/max_terminated_length": 2149.0,
|
|
"completions/mean_length": 552.40234375,
|
|
"completions/mean_terminated_length": 558.9525756835938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 200.0,
|
|
"epoch": 0.05333333333333334,
|
|
"grad_norm": 0.07475942373275757,
|
|
"kl": 0.048542022705078125,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0752,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.030771994963288307,
|
|
"mask/share_reasoning": 0.8386844396591187,
|
|
"mask/share_step_conf": 0.118824802339077,
|
|
"num_tokens": 12159148.0,
|
|
"reward": 0.9325626492500305,
|
|
"reward_std": 0.1499963104724884,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/asymmetric_l2_reward": 0.863436222076416,
|
|
"rewards/final_brier_reward_step": 0.693095326423645,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 50
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6528294086456299,
|
|
"adv/mean_abs_reasoning": 0.4469456076622009,
|
|
"adv/mean_abs_step_conf": 0.7476691007614136,
|
|
"adv/ratio_final_to_reasoning": 1.4606462116505121,
|
|
"adv/ratio_step_to_reasoning": 1.672841365803281,
|
|
"adv/std_final_conf": 0.8555540442466736,
|
|
"adv/std_reasoning": 0.7392084002494812,
|
|
"adv/std_step_conf": 0.9323121309280396,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7527228581338884,
|
|
"calib/avg_num_step_conf": 5.23828125,
|
|
"calib/ece": 0.18209677419354817,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.6330645161290323,
|
|
"calib/gap": 0.23286021505376342,
|
|
"calib/mean_conf": 0.7982258064516129,
|
|
"calib/mu_c": 0.8855483870967741,
|
|
"calib/mu_w": 0.6526881720430107,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.1776612903225804,
|
|
"calib/std_conf": 0.2629716910653483,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.430625,
|
|
"calib/step_q_c_n": 816.0,
|
|
"calib/step_q_gap": 0.032586904761904734,
|
|
"calib/step_q_w": 0.39803809523809525,
|
|
"calib/step_q_w_n": 525.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2269.0,
|
|
"completions/max_terminated_length": 2269.0,
|
|
"completions/mean_length": 546.97265625,
|
|
"completions/mean_terminated_length": 546.97265625,
|
|
"completions/min_length": 152.0,
|
|
"completions/min_terminated_length": 152.0,
|
|
"epoch": 0.0544,
|
|
"grad_norm": 0.04614703357219696,
|
|
"kl": 0.046630859375,
|
|
"learning_rate": 4.138888888888889e-06,
|
|
"loss": -0.0507,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03051183931529522,
|
|
"mask/share_reasoning": 0.8613395690917969,
|
|
"mask/share_step_conf": 0.10814858973026276,
|
|
"num_tokens": 12408469.0,
|
|
"reward": 0.9670203924179077,
|
|
"reward_std": 0.17126522958278656,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/asymmetric_l2_reward": 0.8670368194580078,
|
|
"rewards/final_brier_reward_step": 0.7513788938522339,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 51
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5877702832221985,
|
|
"adv/mean_abs_reasoning": 0.37581461668014526,
|
|
"adv/mean_abs_step_conf": 0.7422032356262207,
|
|
"adv/ratio_final_to_reasoning": 1.563989949125497,
|
|
"adv/ratio_step_to_reasoning": 1.9749184908843174,
|
|
"adv/std_final_conf": 0.8321949243545532,
|
|
"adv/std_reasoning": 0.6612822413444519,
|
|
"adv/std_step_conf": 0.9306304454803467,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7521001344086022,
|
|
"calib/avg_num_step_conf": 5.41796875,
|
|
"calib/ece": 0.13200000000000003,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.548,
|
|
"calib/gap": 0.2936441532258066,
|
|
"calib/mean_conf": 0.7214400000000001,
|
|
"calib/mu_c": 0.7966129032258066,
|
|
"calib/mu_w": 0.50296875,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.05472000000000001,
|
|
"calib/std_conf": 0.3111467923665613,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.43921052631578944,
|
|
"calib/step_q_c_n": 988.0,
|
|
"calib/step_q_gap": 0.0395864661654135,
|
|
"calib/step_q_w": 0.39962406015037594,
|
|
"calib/step_q_w_n": 399.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1785.0,
|
|
"completions/max_terminated_length": 1785.0,
|
|
"completions/mean_length": 542.125,
|
|
"completions/mean_terminated_length": 546.3936767578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 179.0,
|
|
"epoch": 0.055466666666666664,
|
|
"grad_norm": 9.542619705200195,
|
|
"kl": 10.608467102050781,
|
|
"learning_rate": 4.111111111111111e-06,
|
|
"loss": 0.0357,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.030428439378738403,
|
|
"mask/share_reasoning": 0.8515126705169678,
|
|
"mask/share_step_conf": 0.11024642735719681,
|
|
"num_tokens": 12655205.0,
|
|
"reward": 1.0111010074615479,
|
|
"reward_std": 0.15742525458335876,
|
|
"rewards/accuracy_reward_step": 0.73046875,
|
|
"rewards/asymmetric_l2_reward": 0.8825316429138184,
|
|
"rewards/final_brier_reward_step": 0.7998265624046326,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 52
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6738765239715576,
|
|
"adv/mean_abs_reasoning": 0.5075792074203491,
|
|
"adv/mean_abs_step_conf": 0.7184747457504272,
|
|
"adv/ratio_final_to_reasoning": 1.3276283073067061,
|
|
"adv/ratio_step_to_reasoning": 1.4154928634722936,
|
|
"adv/std_final_conf": 0.8424535989761353,
|
|
"adv/std_reasoning": 0.7575823068618774,
|
|
"adv/std_step_conf": 0.9306389093399048,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6674550299800133,
|
|
"calib/avg_num_step_conf": 5.80859375,
|
|
"calib/ece": 0.20988142292490114,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6284584980237155,
|
|
"calib/gap": 0.16155629580279796,
|
|
"calib/mean_conf": 0.7785770750988144,
|
|
"calib/mu_c": 0.8392405063291137,
|
|
"calib/mu_w": 0.6776842105263158,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.18197628458498022,
|
|
"calib/std_conf": 0.29109910124762695,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4375678496868476,
|
|
"calib/step_q_c_n": 958.0,
|
|
"calib/step_q_gap": 0.016811706019550843,
|
|
"calib/step_q_w": 0.42075614366729674,
|
|
"calib/step_q_w_n": 529.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2188.0,
|
|
"completions/max_terminated_length": 2188.0,
|
|
"completions/mean_length": 545.59375,
|
|
"completions/mean_terminated_length": 545.59375,
|
|
"completions/min_length": 154.0,
|
|
"completions/min_terminated_length": 154.0,
|
|
"epoch": 0.05653333333333333,
|
|
"grad_norm": 0.06023424491286278,
|
|
"kl": 0.09381866455078125,
|
|
"learning_rate": 4.083333333333334e-06,
|
|
"loss": -0.0644,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03022611513733864,
|
|
"mask/share_reasoning": 0.853451132774353,
|
|
"mask/share_step_conf": 0.11632277071475983,
|
|
"num_tokens": 12900701.0,
|
|
"reward": 0.963081955909729,
|
|
"reward_std": 0.16977277398109436,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.8808630704879761,
|
|
"rewards/final_brier_reward_step": 0.7242070436477661,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 53
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5761524438858032,
|
|
"adv/mean_abs_reasoning": 0.3302342891693115,
|
|
"adv/mean_abs_step_conf": 0.7532912492752075,
|
|
"adv/ratio_final_to_reasoning": 1.744677832623278,
|
|
"adv/ratio_step_to_reasoning": 2.281081262548706,
|
|
"adv/std_final_conf": 0.7929055690765381,
|
|
"adv/std_reasoning": 0.596068799495697,
|
|
"adv/std_step_conf": 0.9317674040794373,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.8392947834288617,
|
|
"calib/avg_num_step_conf": 5.21875,
|
|
"calib/ece": 0.11992187500000008,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.64453125,
|
|
"calib/gap": 0.34614089820793725,
|
|
"calib/mean_conf": 0.8008593750000002,
|
|
"calib/mu_c": 0.9049720670391062,
|
|
"calib/mu_w": 0.5588311688311689,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.11078125000000008,
|
|
"calib/std_conf": 0.27467460107299574,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.46659142212189614,
|
|
"calib/step_q_c_n": 886.0,
|
|
"calib/step_q_gap": 0.04299142212189622,
|
|
"calib/step_q_w": 0.4235999999999999,
|
|
"calib/step_q_w_n": 450.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1258.0,
|
|
"completions/max_terminated_length": 1258.0,
|
|
"completions/mean_length": 469.8203125,
|
|
"completions/mean_terminated_length": 471.66278076171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.0576,
|
|
"grad_norm": 0.14687201380729675,
|
|
"kl": 0.05890655517578125,
|
|
"learning_rate": 4.055555555555556e-06,
|
|
"loss": -0.0315,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.034429773688316345,
|
|
"mask/share_reasoning": 0.8422298431396484,
|
|
"mask/share_step_conf": 0.11943414062261581,
|
|
"num_tokens": 13127207.0,
|
|
"reward": 1.0360541343688965,
|
|
"reward_std": 0.09538309276103973,
|
|
"rewards/accuracy_reward_step": 0.69921875,
|
|
"rewards/asymmetric_l2_reward": 0.8827582597732544,
|
|
"rewards/final_brier_reward_step": 0.8495062589645386,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 54
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.61080002784729,
|
|
"adv/mean_abs_reasoning": 0.3844855725765228,
|
|
"adv/mean_abs_step_conf": 0.7339929342269897,
|
|
"adv/ratio_final_to_reasoning": 1.5886162483397857,
|
|
"adv/ratio_step_to_reasoning": 1.9090259468212054,
|
|
"adv/std_final_conf": 0.8445244431495667,
|
|
"adv/std_reasoning": 0.6815156936645508,
|
|
"adv/std_step_conf": 0.9316856861114502,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.8308004052684904,
|
|
"calib/avg_num_step_conf": 4.9296875,
|
|
"calib/ece": 0.25086956521739145,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6482213438735178,
|
|
"calib/gap": 0.2831933890577505,
|
|
"calib/mean_conf": 0.7950592885375494,
|
|
"calib/mu_c": 0.9204255319148934,
|
|
"calib/mu_w": 0.6372321428571429,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.24430830039525706,
|
|
"calib/std_conf": 0.2872626988401877,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.47453996983408747,
|
|
"calib/step_q_c_n": 663.0,
|
|
"calib/step_q_gap": 0.07060007000103241,
|
|
"calib/step_q_w": 0.40393989983305506,
|
|
"calib/step_q_w_n": 599.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2188.0,
|
|
"completions/max_terminated_length": 2188.0,
|
|
"completions/mean_length": 499.4375,
|
|
"completions/mean_terminated_length": 501.3961181640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 130.0,
|
|
"epoch": 0.058666666666666666,
|
|
"grad_norm": 0.06633848696947098,
|
|
"kl": 0.06307220458984375,
|
|
"learning_rate": 4.027777777777779e-06,
|
|
"loss": -0.0112,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03279150649905205,
|
|
"mask/share_reasoning": 0.8528703451156616,
|
|
"mask/share_step_conf": 0.11043195426464081,
|
|
"num_tokens": 13362887.0,
|
|
"reward": 0.9695348739624023,
|
|
"reward_std": 0.16215607523918152,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/asymmetric_l2_reward": 0.8861154317855835,
|
|
"rewards/final_brier_reward_step": 0.745141863822937,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 55
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6004599332809448,
|
|
"adv/mean_abs_reasoning": 0.48417240381240845,
|
|
"adv/mean_abs_step_conf": 0.7438913583755493,
|
|
"adv/ratio_final_to_reasoning": 1.2401779377611777,
|
|
"adv/ratio_step_to_reasoning": 1.536418334704942,
|
|
"adv/std_final_conf": 0.8093182444572449,
|
|
"adv/std_reasoning": 0.7393047213554382,
|
|
"adv/std_step_conf": 0.9322188496589661,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6902107758344544,
|
|
"calib/avg_num_step_conf": 5.4765625,
|
|
"calib/ece": 0.40175999999999995,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.844,
|
|
"calib/gap": 0.11437247741687495,
|
|
"calib/mean_conf": 0.9097600000000001,
|
|
"calib/mu_c": 0.9651162790697675,
|
|
"calib/mu_w": 0.8507438016528925,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.39775999999999995,
|
|
"calib/std_conf": 0.21169020383569948,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.4875795297372061,
|
|
"calib/step_q_c_n": 723.0,
|
|
"calib/step_q_gap": 0.03625405109213975,
|
|
"calib/step_q_w": 0.45132547864506634,
|
|
"calib/step_q_w_n": 679.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2483.0,
|
|
"completions/max_terminated_length": 2483.0,
|
|
"completions/mean_length": 526.91796875,
|
|
"completions/mean_terminated_length": 528.984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 194.0,
|
|
"epoch": 0.05973333333333333,
|
|
"grad_norm": 0.1122933179140091,
|
|
"kl": 0.05873870849609375,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": -0.0373,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.032018400728702545,
|
|
"mask/share_reasoning": 0.8491096496582031,
|
|
"mask/share_step_conf": 0.11496569961309433,
|
|
"num_tokens": 13604618.0,
|
|
"reward": 0.8735764026641846,
|
|
"reward_std": 0.1838463842868805,
|
|
"rewards/accuracy_reward_step": 0.50390625,
|
|
"rewards/asymmetric_l2_reward": 0.8586329221725464,
|
|
"rewards/final_brier_reward_step": 0.5932074189186096,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 56
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5015829205513,
|
|
"adv/mean_abs_reasoning": 0.3861631751060486,
|
|
"adv/mean_abs_step_conf": 0.7528830766677856,
|
|
"adv/ratio_final_to_reasoning": 1.2988885343962555,
|
|
"adv/ratio_step_to_reasoning": 1.949650109596877,
|
|
"adv/std_final_conf": 0.742138147354126,
|
|
"adv/std_reasoning": 0.6614132523536682,
|
|
"adv/std_step_conf": 0.9319602251052856,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7295296167247386,
|
|
"calib/avg_num_step_conf": 5.07421875,
|
|
"calib/ece": 0.27711999999999987,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.88,
|
|
"calib/gap": 0.10993176538908245,
|
|
"calib/mean_conf": 0.93424,
|
|
"calib/mu_c": 0.970297619047619,
|
|
"calib/mu_w": 0.8603658536585366,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.26967999999999986,
|
|
"calib/std_conf": 0.17799444485713592,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.5003476245654692,
|
|
"calib/step_q_c_n": 863.0,
|
|
"calib/step_q_gap": 0.05931551447372613,
|
|
"calib/step_q_w": 0.4410321100917431,
|
|
"calib/step_q_w_n": 436.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2508.0,
|
|
"completions/max_terminated_length": 2508.0,
|
|
"completions/mean_length": 518.72265625,
|
|
"completions/mean_terminated_length": 524.87353515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 190.0,
|
|
"epoch": 0.0608,
|
|
"grad_norm": 0.05452043563127518,
|
|
"kl": 0.05272674560546875,
|
|
"learning_rate": 3.972222222222223e-06,
|
|
"loss": -0.0571,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03176348656415939,
|
|
"mask/share_reasoning": 0.8486981987953186,
|
|
"mask/share_step_conf": 0.10781954228878021,
|
|
"num_tokens": 13844203.0,
|
|
"reward": 0.9544321894645691,
|
|
"reward_std": 0.1735970377922058,
|
|
"rewards/accuracy_reward_step": 0.65625,
|
|
"rewards/asymmetric_l2_reward": 0.8717612028121948,
|
|
"rewards/final_brier_reward_step": 0.7105406522750854,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 57
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.653085470199585,
|
|
"adv/mean_abs_reasoning": 0.5664516687393188,
|
|
"adv/mean_abs_step_conf": 0.7543550133705139,
|
|
"adv/ratio_final_to_reasoning": 1.1529412061810609,
|
|
"adv/ratio_step_to_reasoning": 1.331719994839786,
|
|
"adv/std_final_conf": 0.8459213376045227,
|
|
"adv/std_reasoning": 0.7929334044456482,
|
|
"adv/std_step_conf": 0.9330338835716248,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.5168343526007759,
|
|
"calib/avg_num_step_conf": 6.296875,
|
|
"calib/ece": 0.37165322580645166,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.7903225806451613,
|
|
"calib/gap": 0.026798842638258824,
|
|
"calib/mean_conf": 0.8870564516129033,
|
|
"calib/mu_c": 0.899051094890511,
|
|
"calib/mu_w": 0.8722522522522522,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.35314516129032264,
|
|
"calib/std_conf": 0.22490635335836415,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.4674119076549211,
|
|
"calib/step_q_c_n": 823.0,
|
|
"calib/step_q_gap": 0.012785798656188507,
|
|
"calib/step_q_w": 0.45462610899873257,
|
|
"calib/step_q_w_n": 789.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2451.0,
|
|
"completions/max_terminated_length": 2451.0,
|
|
"completions/mean_length": 631.66796875,
|
|
"completions/mean_terminated_length": 634.1451416015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.0,
|
|
"epoch": 0.06186666666666667,
|
|
"grad_norm": 0.0428379587829113,
|
|
"kl": 0.039905548095703125,
|
|
"learning_rate": 3.944444444444445e-06,
|
|
"loss": -0.0225,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.02726609632372856,
|
|
"mask/share_reasoning": 0.8649106621742249,
|
|
"mask/share_step_conf": 0.10391701012849808,
|
|
"num_tokens": 14112230.0,
|
|
"reward": 0.8503095507621765,
|
|
"reward_std": 0.23419946432113647,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/asymmetric_l2_reward": 0.8246171474456787,
|
|
"rewards/final_brier_reward_step": 0.5775644779205322,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"step": 58
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6193721890449524,
|
|
"adv/mean_abs_reasoning": 0.4775833189487457,
|
|
"adv/mean_abs_step_conf": 0.7455660104751587,
|
|
"adv/ratio_final_to_reasoning": 1.2968882380739588,
|
|
"adv/ratio_step_to_reasoning": 1.561122386176082,
|
|
"adv/std_final_conf": 0.8122193217277527,
|
|
"adv/std_reasoning": 0.7393720746040344,
|
|
"adv/std_step_conf": 0.933074951171875,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6466128807900959,
|
|
"calib/avg_num_step_conf": 5.2734375,
|
|
"calib/ece": 0.3228112449799195,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.8273092369477911,
|
|
"calib/gap": 0.058149951314508286,
|
|
"calib/mean_conf": 0.9030522088353414,
|
|
"calib/mu_c": 0.9243037974683543,
|
|
"calib/mu_w": 0.866153846153846,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.2956626506024094,
|
|
"calib/std_conf": 0.2181105456543611,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.5174651162790698,
|
|
"calib/step_q_c_n": 860.0,
|
|
"calib/step_q_gap": 0.03366919791172285,
|
|
"calib/step_q_w": 0.4837959183673469,
|
|
"calib/step_q_w_n": 490.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2220.0,
|
|
"completions/max_terminated_length": 2220.0,
|
|
"completions/mean_length": 548.95703125,
|
|
"completions/mean_terminated_length": 553.279541015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 152.0,
|
|
"epoch": 0.06293333333333333,
|
|
"grad_norm": 0.05327894538640976,
|
|
"kl": 0.057308197021484375,
|
|
"learning_rate": 3.916666666666667e-06,
|
|
"loss": -0.0296,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03187629580497742,
|
|
"mask/share_reasoning": 0.85688316822052,
|
|
"mask/share_step_conf": 0.10342804342508316,
|
|
"num_tokens": 14359011.0,
|
|
"reward": 0.9090771079063416,
|
|
"reward_std": 0.21725571155548096,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.8475908041000366,
|
|
"rewards/final_brier_reward_step": 0.6533757448196411,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 59
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6204248070716858,
|
|
"adv/mean_abs_reasoning": 0.4606407880783081,
|
|
"adv/mean_abs_step_conf": 0.7363015413284302,
|
|
"adv/ratio_final_to_reasoning": 1.346873362343707,
|
|
"adv/ratio_step_to_reasoning": 1.5984288851191792,
|
|
"adv/std_final_conf": 0.8465948104858398,
|
|
"adv/std_reasoning": 0.7391853928565979,
|
|
"adv/std_step_conf": 0.9324144124984741,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6778369905956112,
|
|
"calib/avg_num_step_conf": 4.95703125,
|
|
"calib/ece": 0.3437254901960784,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.796078431372549,
|
|
"calib/gap": 0.1016175548589342,
|
|
"calib/mean_conf": 0.8845098039215686,
|
|
"calib/mu_c": 0.928344827586207,
|
|
"calib/mu_w": 0.8267272727272728,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.3298039215686274,
|
|
"calib/std_conf": 0.23504954609522094,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.5443636363636363,
|
|
"calib/step_q_c_n": 660.0,
|
|
"calib/step_q_gap": 0.06410091058366907,
|
|
"calib/step_q_w": 0.4802627257799672,
|
|
"calib/step_q_w_n": 609.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2740.0,
|
|
"completions/max_terminated_length": 2740.0,
|
|
"completions/mean_length": 518.02734375,
|
|
"completions/mean_terminated_length": 518.02734375,
|
|
"completions/min_length": 232.0,
|
|
"completions/min_terminated_length": 232.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.04044271260499954,
|
|
"kl": 0.048015594482421875,
|
|
"learning_rate": 3.88888888888889e-06,
|
|
"loss": -0.002,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03257352113723755,
|
|
"mask/share_reasoning": 0.8600834012031555,
|
|
"mask/share_step_conf": 0.10734307020902634,
|
|
"num_tokens": 14600482.0,
|
|
"reward": 0.9168381690979004,
|
|
"reward_std": 0.17467540502548218,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/asymmetric_l2_reward": 0.8741821050643921,
|
|
"rewards/final_brier_reward_step": 0.6469941139221191,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 60
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5063987374305725,
|
|
"adv/mean_abs_reasoning": 0.3942033052444458,
|
|
"adv/mean_abs_step_conf": 0.7471913695335388,
|
|
"adv/ratio_final_to_reasoning": 1.284613118899534,
|
|
"adv/ratio_step_to_reasoning": 1.8954467392661887,
|
|
"adv/std_final_conf": 0.7494310140609741,
|
|
"adv/std_reasoning": 0.6815856695175171,
|
|
"adv/std_step_conf": 0.9329997897148132,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6456996148908857,
|
|
"calib/avg_num_step_conf": 4.96875,
|
|
"calib/ece": 0.26280632411067195,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.8537549407114624,
|
|
"calib/gap": 0.10839751818570809,
|
|
"calib/mean_conf": 0.9224110671936759,
|
|
"calib/mu_c": 0.9575438596491228,
|
|
"calib/mu_w": 0.8491463414634147,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.2546640316205534,
|
|
"calib/std_conf": 0.18691619784239447,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.5392926829268293,
|
|
"calib/step_q_c_n": 820.0,
|
|
"calib/step_q_gap": 0.07070861213036911,
|
|
"calib/step_q_w": 0.4685840707964602,
|
|
"calib/step_q_w_n": 452.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2139.0,
|
|
"completions/max_terminated_length": 2139.0,
|
|
"completions/mean_length": 446.3984375,
|
|
"completions/mean_terminated_length": 446.3984375,
|
|
"completions/min_length": 124.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.06506666666666666,
|
|
"grad_norm": 0.039539139717817307,
|
|
"kl": 0.0517120361328125,
|
|
"learning_rate": 3.861111111111112e-06,
|
|
"loss": -0.0068,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.038807108998298645,
|
|
"mask/share_reasoning": 0.8420302867889404,
|
|
"mask/share_step_conf": 0.11916261911392212,
|
|
"num_tokens": 14818824.0,
|
|
"reward": 0.9512588381767273,
|
|
"reward_std": 0.16452768445014954,
|
|
"rewards/accuracy_reward_step": 0.66796875,
|
|
"rewards/asymmetric_l2_reward": 0.8505984544754028,
|
|
"rewards/final_brier_reward_step": 0.7214503288269043,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 61
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7046828269958496,
|
|
"adv/mean_abs_reasoning": 0.6048574447631836,
|
|
"adv/mean_abs_step_conf": 0.782432496547699,
|
|
"adv/ratio_final_to_reasoning": 1.165039519802472,
|
|
"adv/ratio_step_to_reasoning": 1.293581658491515,
|
|
"adv/std_final_conf": 0.8792684078216553,
|
|
"adv/std_reasoning": 0.8099877238273621,
|
|
"adv/std_step_conf": 0.9344117045402527,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.590156823490157,
|
|
"calib/avg_num_step_conf": 5.34375,
|
|
"calib/ece": 0.3766260162601627,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.94921875,
|
|
"calib/frac_conf_gt_0.9": 0.7479674796747967,
|
|
"calib/gap": 0.04934334334334367,
|
|
"calib/mean_conf": 0.8574390243902439,
|
|
"calib/mu_c": 0.8797037037037038,
|
|
"calib/mu_w": 0.8303603603603601,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.3426422764227643,
|
|
"calib/std_conf": 0.25203470035773656,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.5694727592267135,
|
|
"calib/step_q_c_n": 569.0,
|
|
"calib/step_q_gap": 0.0766567392016822,
|
|
"calib/step_q_w": 0.4928160200250313,
|
|
"calib/step_q_w_n": 799.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2655.0,
|
|
"completions/max_terminated_length": 2655.0,
|
|
"completions/mean_length": 535.71875,
|
|
"completions/mean_terminated_length": 539.93701171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 166.0,
|
|
"epoch": 0.06613333333333334,
|
|
"grad_norm": 0.05797210708260536,
|
|
"kl": 0.049541473388671875,
|
|
"learning_rate": 3.833333333333334e-06,
|
|
"loss": -0.0318,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.03127940744161606,
|
|
"mask/share_reasoning": 0.858956515789032,
|
|
"mask/share_step_conf": 0.10195156186819077,
|
|
"num_tokens": 15063048.0,
|
|
"reward": 0.8401246070861816,
|
|
"reward_std": 0.255887508392334,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/asymmetric_l2_reward": 0.7979668378829956,
|
|
"rewards/final_brier_reward_step": 0.5869699120521545,
|
|
"rewards/format_reward_step": 0.94921875,
|
|
"step": 62
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7219770550727844,
|
|
"adv/mean_abs_reasoning": 0.5437690019607544,
|
|
"adv/mean_abs_step_conf": 0.7378803491592407,
|
|
"adv/ratio_final_to_reasoning": 1.3277274954428018,
|
|
"adv/ratio_step_to_reasoning": 1.3569739107940102,
|
|
"adv/std_final_conf": 0.8784008622169495,
|
|
"adv/std_reasoning": 0.7928453683853149,
|
|
"adv/std_step_conf": 0.9337167739868164,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7164169119614665,
|
|
"calib/avg_num_step_conf": 5.10546875,
|
|
"calib/ece": 0.2565194109772422,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.6706827309236948,
|
|
"calib/gap": 0.1969813129961645,
|
|
"calib/mean_conf": 0.821338688085676,
|
|
"calib/mu_c": 0.9012387387387387,
|
|
"calib/mu_w": 0.7042574257425742,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.2417402945113787,
|
|
"calib/std_conf": 0.25969706026981004,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.5517697841726619,
|
|
"calib/step_q_c_n": 695.0,
|
|
"calib/step_q_gap": 0.06320769266939391,
|
|
"calib/step_q_w": 0.48856209150326796,
|
|
"calib/step_q_w_n": 612.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2813.0,
|
|
"completions/max_terminated_length": 2813.0,
|
|
"completions/mean_length": 574.24609375,
|
|
"completions/mean_terminated_length": 576.498046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.0672,
|
|
"grad_norm": 0.043521128594875336,
|
|
"kl": 0.037807464599609375,
|
|
"learning_rate": 3.8055555555555556e-06,
|
|
"loss": -0.0087,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.031664442270994186,
|
|
"mask/share_reasoning": 0.8629859089851379,
|
|
"mask/share_step_conf": 0.10144336521625519,
|
|
"num_tokens": 15318695.0,
|
|
"reward": 0.932765007019043,
|
|
"reward_std": 0.20682235062122345,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/asymmetric_l2_reward": 0.8452221155166626,
|
|
"rewards/final_brier_reward_step": 0.7109330296516418,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 63
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6922527551651001,
|
|
"adv/mean_abs_reasoning": 0.5614722967147827,
|
|
"adv/mean_abs_step_conf": 0.7511861324310303,
|
|
"adv/ratio_final_to_reasoning": 1.2329241517622933,
|
|
"adv/ratio_step_to_reasoning": 1.3378863691517422,
|
|
"adv/std_final_conf": 0.8831756711006165,
|
|
"adv/std_reasoning": 0.7928008437156677,
|
|
"adv/std_step_conf": 0.9331434369087219,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6446036498431708,
|
|
"calib/avg_num_step_conf": 5.12109375,
|
|
"calib/ece": 0.1753386454183265,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.6254980079681275,
|
|
"calib/gap": 0.18141930424864539,
|
|
"calib/mean_conf": 0.7977290836653387,
|
|
"calib/mu_c": 0.858443113772455,
|
|
"calib/mu_w": 0.6770238095238096,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.15386454183266915,
|
|
"calib/std_conf": 0.2702736224399446,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.5586697782963826,
|
|
"calib/step_q_c_n": 857.0,
|
|
"calib/step_q_gap": 0.035542024992417875,
|
|
"calib/step_q_w": 0.5231277533039648,
|
|
"calib/step_q_w_n": 454.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2473.0,
|
|
"completions/max_terminated_length": 2473.0,
|
|
"completions/mean_length": 513.83984375,
|
|
"completions/mean_terminated_length": 515.8549194335938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 207.0,
|
|
"epoch": 0.06826666666666667,
|
|
"grad_norm": 0.15224145352840424,
|
|
"kl": 0.09376144409179688,
|
|
"learning_rate": 3.777777777777778e-06,
|
|
"loss": 0.0487,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.032815635204315186,
|
|
"mask/share_reasoning": 0.8548951148986816,
|
|
"mask/share_step_conf": 0.10838305950164795,
|
|
"num_tokens": 15554014.0,
|
|
"reward": 0.9618469476699829,
|
|
"reward_std": 0.19402143359184265,
|
|
"rewards/accuracy_reward_step": 0.65625,
|
|
"rewards/asymmetric_l2_reward": 0.8472946882247925,
|
|
"rewards/final_brier_reward_step": 0.7498366832733154,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 64
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6335813403129578,
|
|
"adv/mean_abs_reasoning": 0.36810600757598877,
|
|
"adv/mean_abs_step_conf": 0.7440919876098633,
|
|
"adv/ratio_final_to_reasoning": 1.7211926110229714,
|
|
"adv/ratio_step_to_reasoning": 2.021406802105122,
|
|
"adv/std_final_conf": 0.8394150733947754,
|
|
"adv/std_reasoning": 0.6611788272857666,
|
|
"adv/std_step_conf": 0.933049201965332,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7085637993515164,
|
|
"calib/avg_num_step_conf": 4.49609375,
|
|
"calib/ece": 0.28799212598425206,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.7086614173228346,
|
|
"calib/gap": 0.13941445737173375,
|
|
"calib/mean_conf": 0.8620866141732284,
|
|
"calib/mu_c": 0.9208163265306123,
|
|
"calib/mu_w": 0.7814018691588785,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.28566929133858276,
|
|
"calib/std_conf": 0.2172899845365054,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.6184561891515994,
|
|
"calib/step_q_c_n": 719.0,
|
|
"calib/step_q_gap": 0.007021003966414252,
|
|
"calib/step_q_w": 0.6114351851851851,
|
|
"calib/step_q_w_n": 432.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1244.0,
|
|
"completions/max_terminated_length": 1244.0,
|
|
"completions/mean_length": 425.75,
|
|
"completions/mean_terminated_length": 427.4196472167969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 160.0,
|
|
"epoch": 0.06933333333333333,
|
|
"grad_norm": 0.05041688680648804,
|
|
"kl": 0.04650115966796875,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": -0.044,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.038847897201776505,
|
|
"mask/share_reasoning": 0.8437234163284302,
|
|
"mask/share_step_conf": 0.1135224848985672,
|
|
"num_tokens": 15768030.0,
|
|
"reward": 0.9121578335762024,
|
|
"reward_std": 0.1568649709224701,
|
|
"rewards/accuracy_reward_step": 0.57421875,
|
|
"rewards/asymmetric_l2_reward": 0.8197988271713257,
|
|
"rewards/final_brier_reward_step": 0.6912355422973633,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 65
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.699600338935852,
|
|
"adv/mean_abs_reasoning": 0.516379177570343,
|
|
"adv/mean_abs_step_conf": 0.7368757724761963,
|
|
"adv/ratio_final_to_reasoning": 1.3548190347790505,
|
|
"adv/ratio_step_to_reasoning": 1.4270052017653567,
|
|
"adv/std_final_conf": 0.8724037408828735,
|
|
"adv/std_reasoning": 0.7927989959716797,
|
|
"adv/std_step_conf": 0.9339161515235901,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6934820904286553,
|
|
"calib/avg_num_step_conf": 5.796875,
|
|
"calib/ece": 0.20552419354838708,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.4475806451612903,
|
|
"calib/gap": 0.17645005545768921,
|
|
"calib/mean_conf": 0.7271370967741936,
|
|
"calib/mu_c": 0.810381679389313,
|
|
"calib/mu_w": 0.6339316239316238,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.20221774193548386,
|
|
"calib/std_conf": 0.26080417924978105,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.6141432791728212,
|
|
"calib/step_q_c_n": 677.0,
|
|
"calib/step_q_gap": 0.06765505116786441,
|
|
"calib/step_q_w": 0.5464882280049568,
|
|
"calib/step_q_w_n": 807.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2696.0,
|
|
"completions/max_terminated_length": 2696.0,
|
|
"completions/mean_length": 548.875,
|
|
"completions/mean_terminated_length": 548.875,
|
|
"completions/min_length": 147.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.0704,
|
|
"grad_norm": 0.03213903680443764,
|
|
"kl": 0.04193115234375,
|
|
"learning_rate": 3.7222222222222225e-06,
|
|
"loss": 0.1051,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03228248655796051,
|
|
"mask/share_reasoning": 0.8526995182037354,
|
|
"mask/share_step_conf": 0.11501805484294891,
|
|
"num_tokens": 16014894.0,
|
|
"reward": 0.9093428254127502,
|
|
"reward_std": 0.187605082988739,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/asymmetric_l2_reward": 0.8142845034599304,
|
|
"rewards/final_brier_reward_step": 0.7083073854446411,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 66
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6824854612350464,
|
|
"adv/mean_abs_reasoning": 0.29309481382369995,
|
|
"adv/mean_abs_step_conf": 0.7393745183944702,
|
|
"adv/ratio_final_to_reasoning": 2.328548404973039,
|
|
"adv/ratio_step_to_reasoning": 2.5226462002129213,
|
|
"adv/std_final_conf": 0.870021402835846,
|
|
"adv/std_reasoning": 0.6184077858924866,
|
|
"adv/std_step_conf": 0.9328907132148743,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.8083670715249662,
|
|
"calib/avg_num_step_conf": 4.8515625,
|
|
"calib/ece": 0.13792828685258968,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.47410358565737054,
|
|
"calib/gap": 0.27966531713900145,
|
|
"calib/mean_conf": 0.7428685258964144,
|
|
"calib/mu_c": 0.8487179487179489,
|
|
"calib/mu_w": 0.5690526315789475,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.12964143426294825,
|
|
"calib/std_conf": 0.2607859302946761,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.6446345177664974,
|
|
"calib/step_q_c_n": 788.0,
|
|
"calib/step_q_gap": 0.061043768867818926,
|
|
"calib/step_q_w": 0.5835907488986785,
|
|
"calib/step_q_w_n": 454.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2463.0,
|
|
"completions/max_terminated_length": 2463.0,
|
|
"completions/mean_length": 531.15625,
|
|
"completions/mean_terminated_length": 531.15625,
|
|
"completions/min_length": 162.0,
|
|
"completions/min_terminated_length": 162.0,
|
|
"epoch": 0.07146666666666666,
|
|
"grad_norm": 0.045603763312101364,
|
|
"kl": 0.057559967041015625,
|
|
"learning_rate": 3.694444444444445e-06,
|
|
"loss": 0.0319,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03274885565042496,
|
|
"mask/share_reasoning": 0.8626049160957336,
|
|
"mask/share_step_conf": 0.10464620590209961,
|
|
"num_tokens": 16255878.0,
|
|
"reward": 0.9663029909133911,
|
|
"reward_std": 0.13919922709465027,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/asymmetric_l2_reward": 0.8169246912002563,
|
|
"rewards/final_brier_reward_step": 0.7977124452590942,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 67
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.729244589805603,
|
|
"adv/mean_abs_reasoning": 0.5466316938400269,
|
|
"adv/mean_abs_step_conf": 0.739362895488739,
|
|
"adv/ratio_final_to_reasoning": 1.3340693524057137,
|
|
"adv/ratio_step_to_reasoning": 1.3525796323568378,
|
|
"adv/std_final_conf": 0.910574197769165,
|
|
"adv/std_reasoning": 0.7928569316864014,
|
|
"adv/std_step_conf": 0.9348320960998535,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.6934589041095891,
|
|
"calib/avg_num_step_conf": 5.1171875,
|
|
"calib/ece": 0.14150406504065038,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 0.4024390243902439,
|
|
"calib/gap": 0.18019315068493158,
|
|
"calib/mean_conf": 0.7252439024390244,
|
|
"calib/mu_c": 0.7984931506849315,
|
|
"calib/mu_w": 0.6183,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.13662601626016257,
|
|
"calib/std_conf": 0.2594485659967677,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.6530057803468208,
|
|
"calib/step_q_c_n": 692.0,
|
|
"calib/step_q_gap": 0.07198021400377874,
|
|
"calib/step_q_w": 0.581025566343042,
|
|
"calib/step_q_w_n": 618.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2385.0,
|
|
"completions/max_terminated_length": 2385.0,
|
|
"completions/mean_length": 501.19140625,
|
|
"completions/mean_terminated_length": 503.1568908691406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 127.0,
|
|
"epoch": 0.07253333333333334,
|
|
"grad_norm": 0.07743779569864273,
|
|
"kl": 0.046253204345703125,
|
|
"learning_rate": 3.6666666666666666e-06,
|
|
"loss": 0.0019,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.03694310039281845,
|
|
"mask/share_reasoning": 0.8424147963523865,
|
|
"mask/share_step_conf": 0.11673584580421448,
|
|
"num_tokens": 16488271.0,
|
|
"reward": 0.9158475399017334,
|
|
"reward_std": 0.21296223998069763,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/asymmetric_l2_reward": 0.8014019727706909,
|
|
"rewards/final_brier_reward_step": 0.7256054878234863,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"step": 68
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7655402421951294,
|
|
"adv/mean_abs_reasoning": 0.5517368316650391,
|
|
"adv/mean_abs_step_conf": 0.7545615434646606,
|
|
"adv/ratio_final_to_reasoning": 1.3875097659963562,
|
|
"adv/ratio_step_to_reasoning": 1.3676113323584622,
|
|
"adv/std_final_conf": 0.9263461232185364,
|
|
"adv/std_reasoning": 0.792895495891571,
|
|
"adv/std_step_conf": 0.9345738887786865,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6088992974238876,
|
|
"calib/avg_num_step_conf": 4.86328125,
|
|
"calib/ece": 0.19758064516129042,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.2862903225806452,
|
|
"calib/gap": 0.10129065833983864,
|
|
"calib/mean_conf": 0.6591935483870969,
|
|
"calib/mu_c": 0.7106557377049181,
|
|
"calib/mu_w": 0.6093650793650794,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.18241935483870975,
|
|
"calib/std_conf": 0.24994224098648157,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.6377577933450087,
|
|
"calib/step_q_c_n": 571.0,
|
|
"calib/step_q_gap": 0.03260541945776829,
|
|
"calib/step_q_w": 0.6051523738872404,
|
|
"calib/step_q_w_n": 674.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2589.0,
|
|
"completions/max_terminated_length": 2589.0,
|
|
"completions/mean_length": 565.609375,
|
|
"completions/mean_terminated_length": 567.8275146484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 154.0,
|
|
"epoch": 0.0736,
|
|
"grad_norm": 0.04150233417749405,
|
|
"kl": 0.0474853515625,
|
|
"learning_rate": 3.638888888888889e-06,
|
|
"loss": -0.0577,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03185002878308296,
|
|
"mask/share_reasoning": 0.8687409162521362,
|
|
"mask/share_step_conf": 0.09550271928310394,
|
|
"num_tokens": 16737563.0,
|
|
"reward": 0.8744354844093323,
|
|
"reward_std": 0.19976764917373657,
|
|
"rewards/accuracy_reward_step": 0.4765625,
|
|
"rewards/asymmetric_l2_reward": 0.7783041596412659,
|
|
"rewards/final_brier_reward_step": 0.6830667853355408,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 69
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7534902691841125,
|
|
"adv/mean_abs_reasoning": 0.4766578674316406,
|
|
"adv/mean_abs_step_conf": 0.7700801491737366,
|
|
"adv/ratio_final_to_reasoning": 1.5807779975270702,
|
|
"adv/ratio_step_to_reasoning": 1.6155825840516034,
|
|
"adv/std_final_conf": 0.9181990027427673,
|
|
"adv/std_reasoning": 0.720661461353302,
|
|
"adv/std_step_conf": 0.9350027441978455,
|
|
"calib/answer_extract_rate": 0.94921875,
|
|
"calib/auroc": 0.8443896507464703,
|
|
"calib/avg_num_step_conf": 5.5859375,
|
|
"calib/ece": 0.20327868852459027,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.9375,
|
|
"calib/frac_conf_gt_0.9": 0.3524590163934426,
|
|
"calib/gap": 0.3460663379044788,
|
|
"calib/mean_conf": 0.6605737704918033,
|
|
"calib/mu_c": 0.8463716814159292,
|
|
"calib/mu_w": 0.5003053435114504,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.2003688524590165,
|
|
"calib/std_conf": 0.28365674008783714,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.6533443708609271,
|
|
"calib/step_q_c_n": 604.0,
|
|
"calib/step_q_gap": 0.07408964931129025,
|
|
"calib/step_q_w": 0.5792547215496369,
|
|
"calib/step_q_w_n": 826.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2927.0,
|
|
"completions/max_terminated_length": 2927.0,
|
|
"completions/mean_length": 599.86328125,
|
|
"completions/mean_terminated_length": 599.86328125,
|
|
"completions/min_length": 106.0,
|
|
"completions/min_terminated_length": 106.0,
|
|
"epoch": 0.07466666666666667,
|
|
"grad_norm": 0.04824332147836685,
|
|
"kl": 0.04335784912109375,
|
|
"learning_rate": 3.6111111111111115e-06,
|
|
"loss": 0.0352,
|
|
"mask/has_final_conf_rate": 0.953125,
|
|
"mask/share_final_conf": 0.033216990530490875,
|
|
"mask/share_reasoning": 0.8483107686042786,
|
|
"mask/share_step_conf": 0.11847224086523056,
|
|
"num_tokens": 16998120.0,
|
|
"reward": 0.8946191072463989,
|
|
"reward_std": 0.18673905730247498,
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
|
"rewards/asymmetric_l2_reward": 0.7578139305114746,
|
|
"rewards/final_brier_reward_step": 0.7556430101394653,
|
|
"rewards/format_reward_step": 0.9375,
|
|
"step": 70
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7033551931381226,
|
|
"adv/mean_abs_reasoning": 0.5371626615524292,
|
|
"adv/mean_abs_step_conf": 0.746070146560669,
|
|
"adv/ratio_final_to_reasoning": 1.3093895824877104,
|
|
"adv/ratio_step_to_reasoning": 1.3889091702771854,
|
|
"adv/std_final_conf": 0.8913128972053528,
|
|
"adv/std_reasoning": 0.7754039168357849,
|
|
"adv/std_step_conf": 0.9348368048667908,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6245294234198534,
|
|
"calib/avg_num_step_conf": 5.86328125,
|
|
"calib/ece": 0.22508000000000003,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.472,
|
|
"calib/gap": 0.11424278449243785,
|
|
"calib/mean_conf": 0.723,
|
|
"calib/mu_c": 0.7700680272108843,
|
|
"calib/mu_w": 0.6558252427184464,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.18004000000000003,
|
|
"calib/std_conf": 0.28955034104625055,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.6275355670103093,
|
|
"calib/step_q_c_n": 776.0,
|
|
"calib/step_q_gap": 0.03587267045858522,
|
|
"calib/step_q_w": 0.5916628965517241,
|
|
"calib/step_q_w_n": 725.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2462.0,
|
|
"completions/max_terminated_length": 2462.0,
|
|
"completions/mean_length": 533.4921875,
|
|
"completions/mean_terminated_length": 535.5843505859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 157.0,
|
|
"epoch": 0.07573333333333333,
|
|
"grad_norm": 0.036347754299640656,
|
|
"kl": 0.049762725830078125,
|
|
"learning_rate": 3.5833333333333335e-06,
|
|
"loss": -0.0425,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.0346219427883625,
|
|
"mask/share_reasoning": 0.8352914452552795,
|
|
"mask/share_step_conf": 0.12618035078048706,
|
|
"num_tokens": 17239102.0,
|
|
"reward": 0.8964895009994507,
|
|
"reward_std": 0.19243742525577545,
|
|
"rewards/accuracy_reward_step": 0.57421875,
|
|
"rewards/asymmetric_l2_reward": 0.7922806143760681,
|
|
"rewards/final_brier_reward_step": 0.691323459148407,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 71
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6750290393829346,
|
|
"adv/mean_abs_reasoning": 0.45832559466362,
|
|
"adv/mean_abs_step_conf": 0.7397451400756836,
|
|
"adv/ratio_final_to_reasoning": 1.4728154989431919,
|
|
"adv/ratio_step_to_reasoning": 1.614016648183496,
|
|
"adv/std_final_conf": 0.8813406229019165,
|
|
"adv/std_reasoning": 0.7574042081832886,
|
|
"adv/std_step_conf": 0.935146689414978,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7351403061224491,
|
|
"calib/avg_num_step_conf": 4.8671875,
|
|
"calib/ece": 0.21115079365079364,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.47619047619047616,
|
|
"calib/gap": 0.22401785714285705,
|
|
"calib/mean_conf": 0.7532936507936507,
|
|
"calib/mu_c": 0.8528571428571429,
|
|
"calib/mu_w": 0.6288392857142858,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.20444444444444443,
|
|
"calib/std_conf": 0.2702509582515445,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.647175965665236,
|
|
"calib/step_q_c_n": 699.0,
|
|
"calib/step_q_gap": 0.05842441173470592,
|
|
"calib/step_q_w": 0.5887515539305301,
|
|
"calib/step_q_w_n": 547.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2390.0,
|
|
"completions/max_terminated_length": 2390.0,
|
|
"completions/mean_length": 483.91015625,
|
|
"completions/mean_terminated_length": 483.91015625,
|
|
"completions/min_length": 148.0,
|
|
"completions/min_terminated_length": 148.0,
|
|
"epoch": 0.0768,
|
|
"grad_norm": 0.31355804204940796,
|
|
"kl": 0.11542510986328125,
|
|
"learning_rate": 3.555555555555556e-06,
|
|
"loss": 0.0661,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.034122809767723083,
|
|
"mask/share_reasoning": 0.8555728197097778,
|
|
"mask/share_step_conf": 0.11030436307191849,
|
|
"num_tokens": 17467391.0,
|
|
"reward": 0.9290406107902527,
|
|
"reward_std": 0.1833093762397766,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/asymmetric_l2_reward": 0.8128567934036255,
|
|
"rewards/final_brier_reward_step": 0.7397554516792297,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 72
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6979169249534607,
|
|
"adv/mean_abs_reasoning": 0.611485481262207,
|
|
"adv/mean_abs_step_conf": 0.7284319400787354,
|
|
"adv/ratio_final_to_reasoning": 1.1413466817116327,
|
|
"adv/ratio_step_to_reasoning": 1.191249771908127,
|
|
"adv/std_final_conf": 0.8787330389022827,
|
|
"adv/std_reasoning": 0.8267027735710144,
|
|
"adv/std_step_conf": 0.9347501993179321,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.765242718446602,
|
|
"calib/avg_num_step_conf": 5.03125,
|
|
"calib/ece": 0.2145454545454545,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.5889328063241107,
|
|
"calib/gap": 0.232482200647249,
|
|
"calib/mean_conf": 0.7950197628458499,
|
|
"calib/mu_c": 0.8896666666666666,
|
|
"calib/mu_w": 0.6571844660194176,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.20833992094861656,
|
|
"calib/std_conf": 0.2677967852430365,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.6364171390013496,
|
|
"calib/step_q_c_n": 741.0,
|
|
"calib/step_q_gap": 0.059897394942848625,
|
|
"calib/step_q_w": 0.5765197440585009,
|
|
"calib/step_q_w_n": 547.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2415.0,
|
|
"completions/max_terminated_length": 2415.0,
|
|
"completions/mean_length": 468.234375,
|
|
"completions/mean_terminated_length": 468.234375,
|
|
"completions/min_length": 164.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.07786666666666667,
|
|
"grad_norm": 0.027293583378195763,
|
|
"kl": 0.050006866455078125,
|
|
"learning_rate": 3.5277777777777784e-06,
|
|
"loss": -0.0106,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.0346967875957489,
|
|
"mask/share_reasoning": 0.8508319854736328,
|
|
"mask/share_step_conf": 0.11447125673294067,
|
|
"num_tokens": 17694291.0,
|
|
"reward": 0.9352380037307739,
|
|
"reward_std": 0.20372015237808228,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.8135044574737549,
|
|
"rewards/final_brier_reward_step": 0.7436902523040771,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 73
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6834238767623901,
|
|
"adv/mean_abs_reasoning": 0.4758460521697998,
|
|
"adv/mean_abs_step_conf": 0.7555922269821167,
|
|
"adv/ratio_final_to_reasoning": 1.4362289518764753,
|
|
"adv/ratio_step_to_reasoning": 1.587892183904245,
|
|
"adv/std_final_conf": 0.883694052696228,
|
|
"adv/std_reasoning": 0.7206491827964783,
|
|
"adv/std_step_conf": 0.92009037733078,
|
|
"calib/answer_extract_rate": 0.94140625,
|
|
"calib/auroc": 0.741551724137931,
|
|
"calib/avg_num_step_conf": 4.6953125,
|
|
"calib/ece": 0.22875518672199174,
|
|
"calib/final_conf_rate": 0.94140625,
|
|
"calib/format_rate": 0.93359375,
|
|
"calib/frac_conf_gt_0.9": 0.4730290456431535,
|
|
"calib/gap": 0.2606606896551724,
|
|
"calib/mean_conf": 0.7315767634854772,
|
|
"calib/mu_c": 0.85704,
|
|
"calib/mu_w": 0.5963793103448276,
|
|
"calib/nonempty_final_conf_rate": 0.94140625,
|
|
"calib/nonempty_reasoning_rate": 0.94921875,
|
|
"calib/nonempty_step_conf_rate": 0.9453125,
|
|
"calib/pce": 0.22082987551867225,
|
|
"calib/std_conf": 0.3010369411849477,
|
|
"calib/step_conf_rate": 0.9453125,
|
|
"calib/step_q_c": 0.6306426644182125,
|
|
"calib/step_q_c_n": 593.0,
|
|
"calib/step_q_gap": 0.06642263157748995,
|
|
"calib/step_q_w": 0.5642200328407225,
|
|
"calib/step_q_w_n": 609.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2564.0,
|
|
"completions/max_terminated_length": 2564.0,
|
|
"completions/mean_length": 521.0859375,
|
|
"completions/mean_terminated_length": 523.1294555664062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.07893333333333333,
|
|
"grad_norm": 0.0466296449303627,
|
|
"kl": 0.0598602294921875,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.0043,
|
|
"mask/has_final_conf_rate": 0.94140625,
|
|
"mask/share_final_conf": 0.0345626175403595,
|
|
"mask/share_reasoning": 0.8509548306465149,
|
|
"mask/share_step_conf": 0.1105763390660286,
|
|
"num_tokens": 17931617.0,
|
|
"reward": 0.8789986968040466,
|
|
"reward_std": 0.19282305240631104,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/asymmetric_l2_reward": 0.7778134346008301,
|
|
"rewards/final_brier_reward_step": 0.6950277090072632,
|
|
"rewards/format_reward_step": 0.93359375,
|
|
"step": 74
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.628893256187439,
|
|
"adv/mean_abs_reasoning": 0.4681423604488373,
|
|
"adv/mean_abs_step_conf": 0.7405921816825867,
|
|
"adv/ratio_final_to_reasoning": 1.3433803674260107,
|
|
"adv/ratio_step_to_reasoning": 1.581980705554043,
|
|
"adv/std_final_conf": 0.8261920809745789,
|
|
"adv/std_reasoning": 0.7575705051422119,
|
|
"adv/std_step_conf": 0.9339452981948853,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.8038342655284075,
|
|
"calib/avg_num_step_conf": 4.921875,
|
|
"calib/ece": 0.18517928286852595,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.7330677290836654,
|
|
"calib/gap": 0.26919782160730044,
|
|
"calib/mean_conf": 0.8500398406374503,
|
|
"calib/mu_c": 0.934767441860465,
|
|
"calib/mu_w": 0.6655696202531646,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.17498007968127494,
|
|
"calib/std_conf": 0.2661478156350438,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.6109479191438763,
|
|
"calib/step_q_c_n": 841.0,
|
|
"calib/step_q_gap": 0.051716654227408454,
|
|
"calib/step_q_w": 0.5592312649164678,
|
|
"calib/step_q_w_n": 419.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2218.0,
|
|
"completions/max_terminated_length": 2218.0,
|
|
"completions/mean_length": 469.1328125,
|
|
"completions/mean_terminated_length": 469.1328125,
|
|
"completions/min_length": 165.0,
|
|
"completions/min_terminated_length": 165.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.02828267775475979,
|
|
"kl": 0.0692291259765625,
|
|
"learning_rate": 3.4722222222222224e-06,
|
|
"loss": -0.0302,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.035453006625175476,
|
|
"mask/share_reasoning": 0.8479112386703491,
|
|
"mask/share_step_conf": 0.1166357472538948,
|
|
"num_tokens": 18156467.0,
|
|
"reward": 0.9559888243675232,
|
|
"reward_std": 0.22168438136577606,
|
|
"rewards/accuracy_reward_step": 0.671875,
|
|
"rewards/asymmetric_l2_reward": 0.8216685652732849,
|
|
"rewards/final_brier_reward_step": 0.7645277380943298,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"step": 75
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.622812032699585,
|
|
"adv/mean_abs_reasoning": 0.5359321236610413,
|
|
"adv/mean_abs_step_conf": 0.7628259062767029,
|
|
"adv/ratio_final_to_reasoning": 1.1621099113168523,
|
|
"adv/ratio_step_to_reasoning": 1.4233629084700363,
|
|
"adv/std_final_conf": 0.8540507555007935,
|
|
"adv/std_reasoning": 0.7928803563117981,
|
|
"adv/std_step_conf": 0.9348656535148621,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7480906148867315,
|
|
"calib/avg_num_step_conf": 4.6171875,
|
|
"calib/ece": 0.25411067193675896,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.6956521739130435,
|
|
"calib/gap": 0.22156634304207123,
|
|
"calib/mean_conf": 0.8291304347826086,
|
|
"calib/mu_c": 0.9193333333333332,
|
|
"calib/mu_w": 0.697766990291262,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
|
"calib/pce": 0.24517786561264823,
|
|
"calib/std_conf": 0.2779797863926332,
|
|
"calib/step_conf_rate": 0.96484375,
|
|
"calib/step_q_c": 0.595253709198813,
|
|
"calib/step_q_c_n": 674.0,
|
|
"calib/step_q_gap": 0.01054898478936428,
|
|
"calib/step_q_w": 0.5847047244094488,
|
|
"calib/step_q_w_n": 508.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1479.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 486.13671875,
|
|
"completions/mean_terminated_length": 488.04315185546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 190.0,
|
|
"epoch": 0.08106666666666666,
|
|
"grad_norm": 0.043174050748348236,
|
|
"kl": 0.0677642822265625,
|
|
"learning_rate": 3.444444444444445e-06,
|
|
"loss": -0.0746,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03488195687532425,
|
|
"mask/share_reasoning": 0.8566423058509827,
|
|
"mask/share_step_conf": 0.10456950962543488,
|
|
"num_tokens": 18383974.0,
|
|
"reward": 0.9037140607833862,
|
|
"reward_std": 0.22321432828903198,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.7896616458892822,
|
|
"rewards/final_brier_reward_step": 0.7076101303100586,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 76
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6962723731994629,
|
|
"adv/mean_abs_reasoning": 0.5586796998977661,
|
|
"adv/mean_abs_step_conf": 0.7595263123512268,
|
|
"adv/ratio_final_to_reasoning": 1.2462818558234263,
|
|
"adv/ratio_step_to_reasoning": 1.359502255926274,
|
|
"adv/std_final_conf": 0.863667905330658,
|
|
"adv/std_reasoning": 0.7929328680038452,
|
|
"adv/std_step_conf": 0.9350054860115051,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6630559540889527,
|
|
"calib/avg_num_step_conf": 5.11328125,
|
|
"calib/ece": 0.22903614457831326,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.94140625,
|
|
"calib/frac_conf_gt_0.9": 0.6305220883534136,
|
|
"calib/gap": 0.147654949784792,
|
|
"calib/mean_conf": 0.8014859437751004,
|
|
"calib/mu_c": 0.851890243902439,
|
|
"calib/mu_w": 0.704235294117647,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.95703125,
|
|
"calib/pce": 0.1859437751004016,
|
|
"calib/std_conf": 0.29090324783614796,
|
|
"calib/step_conf_rate": 0.95703125,
|
|
"calib/step_q_c": 0.5586034255599472,
|
|
"calib/step_q_c_n": 759.0,
|
|
"calib/step_q_gap": 0.03651251646903819,
|
|
"calib/step_q_w": 0.522090909090909,
|
|
"calib/step_q_w_n": 550.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3045.0,
|
|
"completions/max_terminated_length": 3045.0,
|
|
"completions/mean_length": 512.046875,
|
|
"completions/mean_terminated_length": 512.046875,
|
|
"completions/min_length": 107.0,
|
|
"completions/min_terminated_length": 107.0,
|
|
"epoch": 0.08213333333333334,
|
|
"grad_norm": 0.025604577735066414,
|
|
"kl": 0.06995391845703125,
|
|
"learning_rate": 3.416666666666667e-06,
|
|
"loss": -0.0046,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03763299062848091,
|
|
"mask/share_reasoning": 0.8435276746749878,
|
|
"mask/share_step_conf": 0.11883929371833801,
|
|
"num_tokens": 18619722.0,
|
|
"reward": 0.9079852104187012,
|
|
"reward_std": 0.24078664183616638,
|
|
"rewards/accuracy_reward_step": 0.640625,
|
|
"rewards/asymmetric_l2_reward": 0.8027098774909973,
|
|
"rewards/final_brier_reward_step": 0.696854293346405,
|
|
"rewards/format_reward_step": 0.94140625,
|
|
"step": 77
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7457473278045654,
|
|
"adv/mean_abs_reasoning": 0.6220015287399292,
|
|
"adv/mean_abs_step_conf": 0.7352344989776611,
|
|
"adv/ratio_final_to_reasoning": 1.1989477410374287,
|
|
"adv/ratio_step_to_reasoning": 1.1820461285153478,
|
|
"adv/std_final_conf": 0.9125654101371765,
|
|
"adv/std_reasoning": 0.8429985642433167,
|
|
"adv/std_step_conf": 0.9346243143081665,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6882824726134585,
|
|
"calib/avg_num_step_conf": 5.11328125,
|
|
"calib/ece": 0.2450129333333334,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.94921875,
|
|
"calib/frac_conf_gt_0.9": 0.572,
|
|
"calib/gap": 0.196561737089202,
|
|
"calib/mean_conf": 0.7558137333333333,
|
|
"calib/mu_c": 0.8407284037558685,
|
|
"calib/mu_w": 0.6441666666666666,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.953125,
|
|
"calib/pce": 0.2164133333333334,
|
|
"calib/std_conf": 0.3190380118248817,
|
|
"calib/step_conf_rate": 0.953125,
|
|
"calib/step_q_c": 0.5449234693877552,
|
|
"calib/step_q_c_n": 784.0,
|
|
"calib/step_q_gap": 0.02516442176870748,
|
|
"calib/step_q_w": 0.5197590476190477,
|
|
"calib/step_q_w_n": 525.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1655.0,
|
|
"completions/max_terminated_length": 1655.0,
|
|
"completions/mean_length": 531.875,
|
|
"completions/mean_terminated_length": 533.9608154296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 135.0,
|
|
"epoch": 0.0832,
|
|
"grad_norm": 0.02739427238702774,
|
|
"kl": 0.0735321044921875,
|
|
"learning_rate": 3.3888888888888893e-06,
|
|
"loss": -0.1401,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03082401677966118,
|
|
"mask/share_reasoning": 0.8617717027664185,
|
|
"mask/share_step_conf": 0.10349804162979126,
|
|
"num_tokens": 18863906.0,
|
|
"reward": 0.8943131566047668,
|
|
"reward_std": 0.24572047591209412,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/asymmetric_l2_reward": 0.8101315498352051,
|
|
"rewards/final_brier_reward_step": 0.6777135133743286,
|
|
"rewards/format_reward_step": 0.94921875,
|
|
"step": 78
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6829380989074707,
|
|
"adv/mean_abs_reasoning": 0.46346795558929443,
|
|
"adv/mean_abs_step_conf": 0.7522290945053101,
|
|
"adv/ratio_final_to_reasoning": 1.47353898078913,
|
|
"adv/ratio_step_to_reasoning": 1.6230444530924668,
|
|
"adv/std_final_conf": 0.8693846464157104,
|
|
"adv/std_reasoning": 0.7205665707588196,
|
|
"adv/std_step_conf": 0.9347177147865295,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6895833333333334,
|
|
"calib/avg_num_step_conf": 5.640625,
|
|
"calib/ece": 0.24385826771653552,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.6417322834645669,
|
|
"calib/gap": 0.20205384615384603,
|
|
"calib/mean_conf": 0.7956692913385828,
|
|
"calib/mu_c": 0.8784,
|
|
"calib/mu_w": 0.6763461538461539,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.22448818897637804,
|
|
"calib/std_conf": 0.3087352675593296,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.5525216316440049,
|
|
"calib/step_q_c_n": 809.0,
|
|
"calib/step_q_gap": 0.06615942691959542,
|
|
"calib/step_q_w": 0.4863622047244095,
|
|
"calib/step_q_w_n": 635.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1882.0,
|
|
"completions/max_terminated_length": 1882.0,
|
|
"completions/mean_length": 510.8671875,
|
|
"completions/mean_terminated_length": 514.8897705078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 103.0,
|
|
"epoch": 0.08426666666666667,
|
|
"grad_norm": 0.031064137816429138,
|
|
"kl": 0.07048797607421875,
|
|
"learning_rate": 3.3611111111111117e-06,
|
|
"loss": -0.0686,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.033156618475914,
|
|
"mask/share_reasoning": 0.8510840535163879,
|
|
"mask/share_step_conf": 0.10794685781002045,
|
|
"num_tokens": 19101064.0,
|
|
"reward": 0.9335497617721558,
|
|
"reward_std": 0.19944220781326294,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.847852885723114,
|
|
"rewards/final_brier_reward_step": 0.7051839828491211,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 79
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6879172325134277,
|
|
"adv/mean_abs_reasoning": 0.5587252378463745,
|
|
"adv/mean_abs_step_conf": 0.7600916028022766,
|
|
"adv/ratio_final_to_reasoning": 1.231226345108426,
|
|
"adv/ratio_step_to_reasoning": 1.3604032023541224,
|
|
"adv/std_final_conf": 0.8734791278839111,
|
|
"adv/std_reasoning": 0.7755234241485596,
|
|
"adv/std_step_conf": 0.9350786805152893,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7002287581699347,
|
|
"calib/avg_num_step_conf": 5.828125,
|
|
"calib/ece": 0.2808827404479579,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.782608695652174,
|
|
"calib/gap": 0.18351067538126353,
|
|
"calib/mean_conf": 0.876376811594203,
|
|
"calib/mu_c": 0.9489106753812636,
|
|
"calib/mu_w": 0.7654000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.27625823451910414,
|
|
"calib/std_conf": 0.2520777155119635,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.5498933962264151,
|
|
"calib/step_q_c_n": 848.0,
|
|
"calib/step_q_gap": 0.07029712293448959,
|
|
"calib/step_q_w": 0.4795962732919255,
|
|
"calib/step_q_w_n": 644.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1319.0,
|
|
"completions/max_terminated_length": 1319.0,
|
|
"completions/mean_length": 464.578125,
|
|
"completions/mean_terminated_length": 466.4000244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.08533333333333333,
|
|
"grad_norm": 0.025256939232349396,
|
|
"kl": 0.0876312255859375,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": -0.0673,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03492242097854614,
|
|
"mask/share_reasoning": 0.8304387331008911,
|
|
"mask/share_step_conf": 0.13073261082172394,
|
|
"num_tokens": 19322156.0,
|
|
"reward": 0.9114360809326172,
|
|
"reward_std": 0.23360256850719452,
|
|
"rewards/accuracy_reward_step": 0.59765625,
|
|
"rewards/asymmetric_l2_reward": 0.8124538660049438,
|
|
"rewards/final_brier_reward_step": 0.6955744028091431,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 80
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7039898633956909,
|
|
"adv/mean_abs_reasoning": 0.5247969627380371,
|
|
"adv/mean_abs_step_conf": 0.7595534324645996,
|
|
"adv/ratio_final_to_reasoning": 1.341451863064805,
|
|
"adv/ratio_step_to_reasoning": 1.4473281790766497,
|
|
"adv/std_final_conf": 0.8915676474571228,
|
|
"adv/std_reasoning": 0.7928330898284912,
|
|
"adv/std_step_conf": 0.9346292018890381,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.6952564809707666,
|
|
"calib/avg_num_step_conf": 5.0703125,
|
|
"calib/ece": 0.2584027100271004,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.93359375,
|
|
"calib/frac_conf_gt_0.9": 0.6422764227642277,
|
|
"calib/gap": 0.22656881779738924,
|
|
"calib/mean_conf": 0.7620579945799457,
|
|
"calib/mu_c": 0.8523171171171171,
|
|
"calib/mu_w": 0.6257482993197279,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.20941734417344188,
|
|
"calib/std_conf": 0.3406476325655169,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.5421875,
|
|
"calib/step_q_c_n": 608.0,
|
|
"calib/step_q_gap": 0.10437039855072472,
|
|
"calib/step_q_w": 0.4378171014492753,
|
|
"calib/step_q_w_n": 690.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2644.0,
|
|
"completions/max_terminated_length": 2644.0,
|
|
"completions/mean_length": 529.484375,
|
|
"completions/mean_terminated_length": 529.484375,
|
|
"completions/min_length": 141.0,
|
|
"completions/min_terminated_length": 141.0,
|
|
"epoch": 0.0864,
|
|
"grad_norm": 0.03870345279574394,
|
|
"kl": 0.07418060302734375,
|
|
"learning_rate": 3.3055555555555558e-06,
|
|
"loss": 0.0557,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.035466842353343964,
|
|
"mask/share_reasoning": 0.8551498055458069,
|
|
"mask/share_step_conf": 0.10938338935375214,
|
|
"num_tokens": 19563952.0,
|
|
"reward": 0.8947978019714355,
|
|
"reward_std": 0.23964188992977142,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/asymmetric_l2_reward": 0.8084914684295654,
|
|
"rewards/final_brier_reward_step": 0.6787604093551636,
|
|
"rewards/format_reward_step": 0.93359375,
|
|
"step": 81
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6892867088317871,
|
|
"adv/mean_abs_reasoning": 0.49717363715171814,
|
|
"adv/mean_abs_step_conf": 0.7668944597244263,
|
|
"adv/ratio_final_to_reasoning": 1.3864104154449435,
|
|
"adv/ratio_step_to_reasoning": 1.5425082957292842,
|
|
"adv/std_final_conf": 0.8581146001815796,
|
|
"adv/std_reasoning": 0.7393258810043335,
|
|
"adv/std_step_conf": 0.9347033500671387,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6737952575216726,
|
|
"calib/avg_num_step_conf": 5.015625,
|
|
"calib/ece": 0.2896194225721785,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.7401574803149606,
|
|
"calib/gap": 0.1977885432602413,
|
|
"calib/mean_conf": 0.8453412073490814,
|
|
"calib/mu_c": 0.9278828828828828,
|
|
"calib/mu_w": 0.7300943396226415,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.2761417322834646,
|
|
"calib/std_conf": 0.2911954452589784,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.5398409090909091,
|
|
"calib/step_q_c_n": 704.0,
|
|
"calib/step_q_gap": 0.06357194357366774,
|
|
"calib/step_q_w": 0.47626896551724135,
|
|
"calib/step_q_w_n": 580.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2208.0,
|
|
"completions/max_terminated_length": 2208.0,
|
|
"completions/mean_length": 454.90234375,
|
|
"completions/mean_terminated_length": 454.90234375,
|
|
"completions/min_length": 129.0,
|
|
"completions/min_terminated_length": 129.0,
|
|
"epoch": 0.08746666666666666,
|
|
"grad_norm": 0.02759123221039772,
|
|
"kl": 0.09171295166015625,
|
|
"learning_rate": 3.277777777777778e-06,
|
|
"loss": 0.0137,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.037247899919748306,
|
|
"mask/share_reasoning": 0.8433917760848999,
|
|
"mask/share_step_conf": 0.11936035752296448,
|
|
"num_tokens": 19785959.0,
|
|
"reward": 0.908469557762146,
|
|
"reward_std": 0.21961763501167297,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/asymmetric_l2_reward": 0.8281220197677612,
|
|
"rewards/final_brier_reward_step": 0.6786607503890991,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 82
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6679799556732178,
|
|
"adv/mean_abs_reasoning": 0.5168710350990295,
|
|
"adv/mean_abs_step_conf": 0.7405879497528076,
|
|
"adv/ratio_final_to_reasoning": 1.2923532376799498,
|
|
"adv/ratio_step_to_reasoning": 1.4328292735748196,
|
|
"adv/std_final_conf": 0.8723222613334656,
|
|
"adv/std_reasoning": 0.7929161190986633,
|
|
"adv/std_step_conf": 0.9351591467857361,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.7158919511860689,
|
|
"calib/avg_num_step_conf": 5.39453125,
|
|
"calib/ece": 0.2495102040816326,
|
|
"calib/final_conf_rate": 0.95703125,
|
|
"calib/format_rate": 0.91015625,
|
|
"calib/frac_conf_gt_0.9": 0.6775510204081633,
|
|
"calib/gap": 0.2795797339914986,
|
|
"calib/mean_conf": 0.7936734693877551,
|
|
"calib/mu_c": 0.91006993006993,
|
|
"calib/mu_w": 0.6304901960784314,
|
|
"calib/nonempty_final_conf_rate": 0.95703125,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.9375,
|
|
"calib/pce": 0.22975510204081628,
|
|
"calib/std_conf": 0.3341653866316157,
|
|
"calib/step_conf_rate": 0.9375,
|
|
"calib/step_q_c": 0.48273054054054054,
|
|
"calib/step_q_c_n": 740.0,
|
|
"calib/step_q_gap": 0.04936725929054059,
|
|
"calib/step_q_w": 0.43336328124999995,
|
|
"calib/step_q_w_n": 640.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2648.0,
|
|
"completions/max_terminated_length": 2648.0,
|
|
"completions/mean_length": 554.6796875,
|
|
"completions/mean_terminated_length": 556.8549194335938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.08853333333333334,
|
|
"grad_norm": 0.03267595171928406,
|
|
"kl": 0.07877349853515625,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": -0.0906,
|
|
"mask/has_final_conf_rate": 0.95703125,
|
|
"mask/share_final_conf": 0.03222406283020973,
|
|
"mask/share_reasoning": 0.8548700213432312,
|
|
"mask/share_step_conf": 0.10899969935417175,
|
|
"num_tokens": 20035221.0,
|
|
"reward": 0.8798561096191406,
|
|
"reward_std": 0.2699333727359772,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/asymmetric_l2_reward": 0.7940636277198792,
|
|
"rewards/final_brier_reward_step": 0.6711171865463257,
|
|
"rewards/format_reward_step": 0.91015625,
|
|
"step": 83
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.711301326751709,
|
|
"adv/mean_abs_reasoning": 0.5254456996917725,
|
|
"adv/mean_abs_step_conf": 0.7322558164596558,
|
|
"adv/ratio_final_to_reasoning": 1.3537104351010196,
|
|
"adv/ratio_step_to_reasoning": 1.393589893093042,
|
|
"adv/std_final_conf": 0.8830074071884155,
|
|
"adv/std_reasoning": 0.792775571346283,
|
|
"adv/std_step_conf": 0.934762716293335,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.7625246548323472,
|
|
"calib/avg_num_step_conf": 4.6953125,
|
|
"calib/ece": 0.29368421052631577,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.94921875,
|
|
"calib/frac_conf_gt_0.9": 0.680161943319838,
|
|
"calib/gap": 0.274820512820513,
|
|
"calib/mean_conf": 0.785668016194332,
|
|
"calib/mu_c": 0.9158461538461539,
|
|
"calib/mu_w": 0.6410256410256409,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.27651821862348175,
|
|
"calib/std_conf": 0.3377123849800877,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.49211437403400315,
|
|
"calib/step_q_c_n": 647.0,
|
|
"calib/step_q_gap": 0.05764590556553467,
|
|
"calib/step_q_w": 0.4344684684684685,
|
|
"calib/step_q_w_n": 555.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2879.0,
|
|
"completions/max_terminated_length": 2879.0,
|
|
"completions/mean_length": 498.296875,
|
|
"completions/mean_terminated_length": 498.296875,
|
|
"completions/min_length": 163.0,
|
|
"completions/min_terminated_length": 163.0,
|
|
"epoch": 0.0896,
|
|
"grad_norm": 0.041336141526699066,
|
|
"kl": 0.08129119873046875,
|
|
"learning_rate": 3.2222222222222227e-06,
|
|
"loss": -0.0067,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.036794595420360565,
|
|
"mask/share_reasoning": 0.8531485795974731,
|
|
"mask/share_step_conf": 0.11005677282810211,
|
|
"num_tokens": 20268705.0,
|
|
"reward": 0.8911169767379761,
|
|
"reward_std": 0.24814923107624054,
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
|
"rewards/asymmetric_l2_reward": 0.8201553225517273,
|
|
"rewards/final_brier_reward_step": 0.6706722974777222,
|
|
"rewards/format_reward_step": 0.94921875,
|
|
"step": 84
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7375852465629578,
|
|
"adv/mean_abs_reasoning": 0.6083944439888,
|
|
"adv/mean_abs_step_conf": 0.7445007562637329,
|
|
"adv/ratio_final_to_reasoning": 1.212347111073447,
|
|
"adv/ratio_step_to_reasoning": 1.2237139303616626,
|
|
"adv/std_final_conf": 0.8906832933425903,
|
|
"adv/std_reasoning": 0.8099904656410217,
|
|
"adv/std_step_conf": 0.9348547458648682,
|
|
"calib/answer_extract_rate": 0.94140625,
|
|
"calib/auroc": 0.7101584022038567,
|
|
"calib/avg_num_step_conf": 5.140625,
|
|
"calib/ece": 0.31475795297372056,
|
|
"calib/final_conf_rate": 0.94140625,
|
|
"calib/format_rate": 0.921875,
|
|
"calib/frac_conf_gt_0.9": 0.6473029045643154,
|
|
"calib/gap": 0.2235241046831956,
|
|
"calib/mean_conf": 0.7624757952973721,
|
|
"calib/mu_c": 0.8737741046831956,
|
|
"calib/mu_w": 0.65025,
|
|
"calib/nonempty_final_conf_rate": 0.94140625,
|
|
"calib/nonempty_reasoning_rate": 0.96484375,
|
|
"calib/nonempty_step_conf_rate": 0.9453125,
|
|
"calib/pce": 0.2875795297372061,
|
|
"calib/std_conf": 0.3512132715214306,
|
|
"calib/step_conf_rate": 0.9453125,
|
|
"calib/step_q_c": 0.46668918918918917,
|
|
"calib/step_q_c_n": 592.0,
|
|
"calib/step_q_gap": 0.060110920312577754,
|
|
"calib/step_q_w": 0.4065782688766114,
|
|
"calib/step_q_w_n": 724.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2932.0,
|
|
"completions/max_terminated_length": 2932.0,
|
|
"completions/mean_length": 548.95703125,
|
|
"completions/mean_terminated_length": 553.279541015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.0,
|
|
"epoch": 0.09066666666666667,
|
|
"grad_norm": 0.03381429985165596,
|
|
"kl": 0.07662200927734375,
|
|
"learning_rate": 3.1944444444444443e-06,
|
|
"loss": -0.0631,
|
|
"mask/has_final_conf_rate": 0.94140625,
|
|
"mask/share_final_conf": 0.033809252083301544,
|
|
"mask/share_reasoning": 0.844788670539856,
|
|
"mask/share_step_conf": 0.11358959972858429,
|
|
"num_tokens": 20517062.0,
|
|
"reward": 0.848590612411499,
|
|
"reward_std": 0.26348742842674255,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/asymmetric_l2_reward": 0.8029472827911377,
|
|
"rewards/final_brier_reward_step": 0.6153277158737183,
|
|
"rewards/format_reward_step": 0.921875,
|
|
"step": 85
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7561108469963074,
|
|
"adv/mean_abs_reasoning": 0.4853861927986145,
|
|
"adv/mean_abs_step_conf": 0.7464814782142639,
|
|
"adv/ratio_final_to_reasoning": 1.5577510407470858,
|
|
"adv/ratio_step_to_reasoning": 1.5379124690594097,
|
|
"adv/std_final_conf": 0.9270208477973938,
|
|
"adv/std_reasoning": 0.7754190564155579,
|
|
"adv/std_step_conf": 0.9350854158401489,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6685927067283,
|
|
"calib/avg_num_step_conf": 4.6015625,
|
|
"calib/ece": 0.31136,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.568,
|
|
"calib/gap": 0.23257960965588087,
|
|
"calib/mean_conf": 0.69152,
|
|
"calib/mu_c": 0.814322033898305,
|
|
"calib/mu_w": 0.5817424242424242,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
|
"calib/pce": 0.26544,
|
|
"calib/std_conf": 0.3847857710466956,
|
|
"calib/step_conf_rate": 0.96484375,
|
|
"calib/step_q_c": 0.46849652777777784,
|
|
"calib/step_q_c_n": 576.0,
|
|
"calib/step_q_gap": 0.058679252030269524,
|
|
"calib/step_q_w": 0.4098172757475083,
|
|
"calib/step_q_w_n": 602.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1938.0,
|
|
"completions/max_terminated_length": 1938.0,
|
|
"completions/mean_length": 488.9609375,
|
|
"completions/mean_terminated_length": 490.8784484863281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 148.0,
|
|
"epoch": 0.09173333333333333,
|
|
"grad_norm": 0.044561564922332764,
|
|
"kl": 0.08454132080078125,
|
|
"learning_rate": 3.1666666666666667e-06,
|
|
"loss": -0.0951,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03566384315490723,
|
|
"mask/share_reasoning": 0.8496521711349487,
|
|
"mask/share_step_conf": 0.11077769100666046,
|
|
"num_tokens": 20747748.0,
|
|
"reward": 0.8705247640609741,
|
|
"reward_std": 0.25272929668426514,
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
|
"rewards/asymmetric_l2_reward": 0.8211120367050171,
|
|
"rewards/final_brier_reward_step": 0.6363437175750732,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"step": 86
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7711803913116455,
|
|
"adv/mean_abs_reasoning": 0.6064971685409546,
|
|
"adv/mean_abs_step_conf": 0.7583389282226562,
|
|
"adv/ratio_final_to_reasoning": 1.271531725641635,
|
|
"adv/ratio_step_to_reasoning": 1.2503585631685408,
|
|
"adv/std_final_conf": 0.9249719381332397,
|
|
"adv/std_reasoning": 0.8746480345726013,
|
|
"adv/std_step_conf": 0.9346683621406555,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6656976744186047,
|
|
"calib/avg_num_step_conf": 5.203125,
|
|
"calib/ece": 0.2269611780455154,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.5943775100401606,
|
|
"calib/gap": 0.2687181616832781,
|
|
"calib/mean_conf": 0.6965729585006694,
|
|
"calib/mu_c": 0.779670542635659,
|
|
"calib/mu_w": 0.5109523809523809,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.1163855421686747,
|
|
"calib/std_conf": 0.389896312445251,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.4399458997722096,
|
|
"calib/step_q_c_n": 878.0,
|
|
"calib/step_q_gap": 0.051287529728156755,
|
|
"calib/step_q_w": 0.38865837004405285,
|
|
"calib/step_q_w_n": 454.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2910.0,
|
|
"completions/max_terminated_length": 2910.0,
|
|
"completions/mean_length": 489.95703125,
|
|
"completions/mean_terminated_length": 491.8784484863281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 120.0,
|
|
"epoch": 0.0928,
|
|
"grad_norm": 0.04757794737815857,
|
|
"kl": 0.0927886962890625,
|
|
"learning_rate": 3.138888888888889e-06,
|
|
"loss": -0.0186,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.036881767213344574,
|
|
"mask/share_reasoning": 0.8420805931091309,
|
|
"mask/share_step_conf": 0.11713138222694397,
|
|
"num_tokens": 20978673.0,
|
|
"reward": 0.947090744972229,
|
|
"reward_std": 0.24253800511360168,
|
|
"rewards/accuracy_reward_step": 0.671875,
|
|
"rewards/asymmetric_l2_reward": 0.8506828546524048,
|
|
"rewards/final_brier_reward_step": 0.7169361114501953,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 87
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6935921907424927,
|
|
"adv/mean_abs_reasoning": 0.47695034742355347,
|
|
"adv/mean_abs_step_conf": 0.7630202770233154,
|
|
"adv/ratio_final_to_reasoning": 1.4542230538026035,
|
|
"adv/ratio_step_to_reasoning": 1.5997897499081155,
|
|
"adv/std_final_conf": 0.8939658999443054,
|
|
"adv/std_reasoning": 0.7392098903656006,
|
|
"adv/std_step_conf": 0.9342235922813416,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.7492699596147873,
|
|
"calib/avg_num_step_conf": 5.25390625,
|
|
"calib/ece": 0.21371054687500007,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.51171875,
|
|
"calib/gap": 0.3207307300403851,
|
|
"calib/mean_conf": 0.664726953125,
|
|
"calib/mu_c": 0.8037937931034482,
|
|
"calib/mu_w": 0.4830630630630631,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.15601562500000005,
|
|
"calib/std_conf": 0.3798028370295785,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.4338300492610837,
|
|
"calib/step_q_c_n": 812.0,
|
|
"calib/step_q_gap": 0.04416776033050207,
|
|
"calib/step_q_w": 0.38966228893058164,
|
|
"calib/step_q_w_n": 533.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1490.0,
|
|
"completions/max_terminated_length": 1490.0,
|
|
"completions/mean_length": 505.84375,
|
|
"completions/mean_terminated_length": 507.8274841308594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.09386666666666667,
|
|
"grad_norm": 0.04795219376683235,
|
|
"kl": 0.08171844482421875,
|
|
"learning_rate": 3.1111111111111116e-06,
|
|
"loss": -0.0578,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03347531333565712,
|
|
"mask/share_reasoning": 0.8500121831893921,
|
|
"mask/share_step_conf": 0.11260630190372467,
|
|
"num_tokens": 21218017.0,
|
|
"reward": 0.950875461101532,
|
|
"reward_std": 0.1858539879322052,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/asymmetric_l2_reward": 0.8525054454803467,
|
|
"rewards/final_brier_reward_step": 0.740651547908783,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 88
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7229398488998413,
|
|
"adv/mean_abs_reasoning": 0.47888630628585815,
|
|
"adv/mean_abs_step_conf": 0.743857204914093,
|
|
"adv/ratio_final_to_reasoning": 1.5096273153158446,
|
|
"adv/ratio_step_to_reasoning": 1.5533064845459743,
|
|
"adv/std_final_conf": 0.891949474811554,
|
|
"adv/std_reasoning": 0.75757896900177,
|
|
"adv/std_step_conf": 0.9348465204238892,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7704152467499685,
|
|
"calib/avg_num_step_conf": 5.07421875,
|
|
"calib/ece": 0.1756086956521739,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.43478260869565216,
|
|
"calib/gap": 0.37849211157389884,
|
|
"calib/mean_conf": 0.5794901185770751,
|
|
"calib/mu_c": 0.7500359712230216,
|
|
"calib/mu_w": 0.37154385964912273,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.1028458498023715,
|
|
"calib/std_conf": 0.4025525435239658,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.44734042553191494,
|
|
"calib/step_q_c_n": 658.0,
|
|
"calib/step_q_gap": 0.09799409167856082,
|
|
"calib/step_q_w": 0.3493463338533541,
|
|
"calib/step_q_w_n": 641.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2286.0,
|
|
"completions/max_terminated_length": 2286.0,
|
|
"completions/mean_length": 525.1953125,
|
|
"completions/mean_terminated_length": 525.1953125,
|
|
"completions/min_length": 143.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.09493333333333333,
|
|
"grad_norm": 0.033446215093135834,
|
|
"kl": 0.0828399658203125,
|
|
"learning_rate": 3.0833333333333336e-06,
|
|
"loss": -0.0353,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03558123856782913,
|
|
"mask/share_reasoning": 0.8521634936332703,
|
|
"mask/share_step_conf": 0.11225523054599762,
|
|
"num_tokens": 21461355.0,
|
|
"reward": 0.9454695582389832,
|
|
"reward_std": 0.22363021969795227,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.8446345329284668,
|
|
"rewards/final_brier_reward_step": 0.7455234527587891,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 89
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7110995054244995,
|
|
"adv/mean_abs_reasoning": 0.510982096195221,
|
|
"adv/mean_abs_step_conf": 0.7581682205200195,
|
|
"adv/ratio_final_to_reasoning": 1.3916329177075974,
|
|
"adv/ratio_step_to_reasoning": 1.4837471335401955,
|
|
"adv/std_final_conf": 0.893814206123352,
|
|
"adv/std_reasoning": 0.7575870156288147,
|
|
"adv/std_step_conf": 0.933884859085083,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7209821428571428,
|
|
"calib/avg_num_step_conf": 6.0859375,
|
|
"calib/ece": 0.21139442231075695,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.545816733067729,
|
|
"calib/gap": 0.3062506868131869,
|
|
"calib/mean_conf": 0.678406374501992,
|
|
"calib/mu_c": 0.7894375,
|
|
"calib/mu_w": 0.4831868131868131,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.12617529880478087,
|
|
"calib/std_conf": 0.3872137809409691,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4215166484118291,
|
|
"calib/step_q_c_n": 913.0,
|
|
"calib/step_q_gap": 0.06626083445834074,
|
|
"calib/step_q_w": 0.3552558139534884,
|
|
"calib/step_q_w_n": 645.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2538.0,
|
|
"completions/max_terminated_length": 2538.0,
|
|
"completions/mean_length": 533.96484375,
|
|
"completions/mean_terminated_length": 533.96484375,
|
|
"completions/min_length": 157.0,
|
|
"completions/min_terminated_length": 157.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.029829688370227814,
|
|
"kl": 0.0811309814453125,
|
|
"learning_rate": 3.055555555555556e-06,
|
|
"loss": 0.0606,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03403983637690544,
|
|
"mask/share_reasoning": 0.8410810828208923,
|
|
"mask/share_step_conf": 0.12487903982400894,
|
|
"num_tokens": 21701370.0,
|
|
"reward": 0.9619349241256714,
|
|
"reward_std": 0.1955302506685257,
|
|
"rewards/accuracy_reward_step": 0.625,
|
|
"rewards/asymmetric_l2_reward": 0.8681378364562988,
|
|
"rewards/final_brier_reward_step": 0.736200749874115,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 90
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7319117784500122,
|
|
"adv/mean_abs_reasoning": 0.5304526090621948,
|
|
"adv/mean_abs_step_conf": 0.7541947364807129,
|
|
"adv/ratio_final_to_reasoning": 1.3797873098295885,
|
|
"adv/ratio_step_to_reasoning": 1.4217947533787785,
|
|
"adv/std_final_conf": 0.8858980536460876,
|
|
"adv/std_reasoning": 0.7927713394165039,
|
|
"adv/std_step_conf": 0.9336501359939575,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7143346346140675,
|
|
"calib/avg_num_step_conf": 6.0,
|
|
"calib/ece": 0.23464520000000003,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.52,
|
|
"calib/gap": 0.28583001849188405,
|
|
"calib/mean_conf": 0.6415948000000001,
|
|
"calib/mu_c": 0.7479235668789809,
|
|
"calib/mu_w": 0.4620935483870968,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.12412000000000004,
|
|
"calib/std_conf": 0.4015845626676404,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4017911318553092,
|
|
"calib/step_q_c_n": 857.0,
|
|
"calib/step_q_gap": 0.06388833362261404,
|
|
"calib/step_q_w": 0.33790279823269515,
|
|
"calib/step_q_w_n": 679.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2684.0,
|
|
"completions/max_terminated_length": 2684.0,
|
|
"completions/mean_length": 534.22265625,
|
|
"completions/mean_terminated_length": 540.5573120117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 208.0,
|
|
"epoch": 0.09706666666666666,
|
|
"grad_norm": 0.04101370647549629,
|
|
"kl": 0.0859375,
|
|
"learning_rate": 3.0277777777777776e-06,
|
|
"loss": -0.0541,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03051559254527092,
|
|
"mask/share_reasoning": 0.8473619222640991,
|
|
"mask/share_step_conf": 0.11040370911359787,
|
|
"num_tokens": 21945843.0,
|
|
"reward": 0.9472370147705078,
|
|
"reward_std": 0.20020480453968048,
|
|
"rewards/accuracy_reward_step": 0.61328125,
|
|
"rewards/asymmetric_l2_reward": 0.8600229024887085,
|
|
"rewards/final_brier_reward_step": 0.7172636389732361,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 91
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6581138372421265,
|
|
"adv/mean_abs_reasoning": 0.5041226744651794,
|
|
"adv/mean_abs_step_conf": 0.7552310228347778,
|
|
"adv/ratio_final_to_reasoning": 1.3054636710009428,
|
|
"adv/ratio_step_to_reasoning": 1.4981096092057309,
|
|
"adv/std_final_conf": 0.8489682674407959,
|
|
"adv/std_reasoning": 0.7752818465232849,
|
|
"adv/std_step_conf": 0.933874785900116,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7361003611971104,
|
|
"calib/avg_num_step_conf": 5.03515625,
|
|
"calib/ece": 0.21250656167979007,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.5984251968503937,
|
|
"calib/gap": 0.2983836429308565,
|
|
"calib/mean_conf": 0.7195406824146982,
|
|
"calib/mu_c": 0.8393640350877193,
|
|
"calib/mu_w": 0.5409803921568628,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.1668110236220473,
|
|
"calib/std_conf": 0.36532456459468643,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4126775956284153,
|
|
"calib/step_q_c_n": 732.0,
|
|
"calib/step_q_gap": 0.06470632094259843,
|
|
"calib/step_q_w": 0.34797127468581684,
|
|
"calib/step_q_w_n": 557.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1995.0,
|
|
"completions/max_terminated_length": 1995.0,
|
|
"completions/mean_length": 476.046875,
|
|
"completions/mean_terminated_length": 476.046875,
|
|
"completions/min_length": 165.0,
|
|
"completions/min_terminated_length": 165.0,
|
|
"epoch": 0.09813333333333334,
|
|
"grad_norm": 0.030025403946638107,
|
|
"kl": 0.0915374755859375,
|
|
"learning_rate": 3e-06,
|
|
"loss": -0.0822,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03648817166686058,
|
|
"mask/share_reasoning": 0.8472324013710022,
|
|
"mask/share_step_conf": 0.11627940833568573,
|
|
"num_tokens": 22174431.0,
|
|
"reward": 0.9668546915054321,
|
|
"reward_std": 0.18710312247276306,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/asymmetric_l2_reward": 0.870896577835083,
|
|
"rewards/final_brier_reward_step": 0.7471877336502075,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 92
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6915310025215149,
|
|
"adv/mean_abs_reasoning": 0.5947073698043823,
|
|
"adv/mean_abs_step_conf": 0.7633007764816284,
|
|
"adv/ratio_final_to_reasoning": 1.1628088663992526,
|
|
"adv/ratio_step_to_reasoning": 1.28348968793291,
|
|
"adv/std_final_conf": 0.8915781378746033,
|
|
"adv/std_reasoning": 0.8265567421913147,
|
|
"adv/std_step_conf": 0.9339955449104309,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6820295846521542,
|
|
"calib/avg_num_step_conf": 6.15625,
|
|
"calib/ece": 0.27851960000000003,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.504,
|
|
"calib/gap": 0.23995874297526,
|
|
"calib/mean_conf": 0.6413204,
|
|
"calib/mu_c": 0.7497817518248175,
|
|
"calib/mu_w": 0.5098230088495576,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.18592000000000006,
|
|
"calib/std_conf": 0.40051339126655927,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.4214962078651686,
|
|
"calib/step_q_c_n": 712.0,
|
|
"calib/step_q_gap": 0.1148526893466501,
|
|
"calib/step_q_w": 0.3066435185185185,
|
|
"calib/step_q_w_n": 864.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2548.0,
|
|
"completions/max_terminated_length": 2548.0,
|
|
"completions/mean_length": 537.7578125,
|
|
"completions/mean_terminated_length": 539.86669921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 129.0,
|
|
"epoch": 0.0992,
|
|
"grad_norm": 0.027518408372998238,
|
|
"kl": 0.081146240234375,
|
|
"learning_rate": 2.9722222222222225e-06,
|
|
"loss": 0.0065,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03318294137716293,
|
|
"mask/share_reasoning": 0.8384314775466919,
|
|
"mask/share_step_conf": 0.12447934597730637,
|
|
"num_tokens": 22417873.0,
|
|
"reward": 0.9200072288513184,
|
|
"reward_std": 0.20049725472927094,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/asymmetric_l2_reward": 0.8553484678268433,
|
|
"rewards/final_brier_reward_step": 0.6831035017967224,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 93
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6092467904090881,
|
|
"adv/mean_abs_reasoning": 0.5352387428283691,
|
|
"adv/mean_abs_step_conf": 0.7489203214645386,
|
|
"adv/ratio_final_to_reasoning": 1.1382710959779132,
|
|
"adv/ratio_step_to_reasoning": 1.3992266656688734,
|
|
"adv/std_final_conf": 0.83155757188797,
|
|
"adv/std_reasoning": 0.7928059697151184,
|
|
"adv/std_step_conf": 0.9342983961105347,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7352882966090515,
|
|
"calib/avg_num_step_conf": 5.47265625,
|
|
"calib/ece": 0.1942570281124499,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.6144578313253012,
|
|
"calib/gap": 0.34813497822931794,
|
|
"calib/mean_conf": 0.7179518072289156,
|
|
"calib/mu_c": 0.8661538461538462,
|
|
"calib/mu_w": 0.5180188679245282,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.1689558232931728,
|
|
"calib/std_conf": 0.37905852205075696,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.4239228723404256,
|
|
"calib/step_q_c_n": 752.0,
|
|
"calib/step_q_gap": 0.09478573828803727,
|
|
"calib/step_q_w": 0.3291371340523883,
|
|
"calib/step_q_w_n": 649.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2317.0,
|
|
"completions/max_terminated_length": 2317.0,
|
|
"completions/mean_length": 487.31640625,
|
|
"completions/mean_terminated_length": 491.1535339355469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 174.0,
|
|
"epoch": 0.10026666666666667,
|
|
"grad_norm": 0.04939788579940796,
|
|
"kl": 0.08742523193359375,
|
|
"learning_rate": 2.944444444444445e-06,
|
|
"loss": -0.0123,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03488625958561897,
|
|
"mask/share_reasoning": 0.8387618064880371,
|
|
"mask/share_step_conf": 0.11853942275047302,
|
|
"num_tokens": 22651306.0,
|
|
"reward": 0.9398282766342163,
|
|
"reward_std": 0.20985865592956543,
|
|
"rewards/accuracy_reward_step": 0.55859375,
|
|
"rewards/asymmetric_l2_reward": 0.8465824127197266,
|
|
"rewards/final_brier_reward_step": 0.7291679382324219,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 94
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5665971636772156,
|
|
"adv/mean_abs_reasoning": 0.4219573140144348,
|
|
"adv/mean_abs_step_conf": 0.7465494871139526,
|
|
"adv/ratio_final_to_reasoning": 1.342783131987215,
|
|
"adv/ratio_step_to_reasoning": 1.7692535768876703,
|
|
"adv/std_final_conf": 0.799198567867279,
|
|
"adv/std_reasoning": 0.7206243276596069,
|
|
"adv/std_step_conf": 0.9329851865768433,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7522349936143038,
|
|
"calib/avg_num_step_conf": 5.78125,
|
|
"calib/ece": 0.1738554216867471,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.6506024096385542,
|
|
"calib/gap": 0.3717943805874839,
|
|
"calib/mean_conf": 0.7402811244979919,
|
|
"calib/mu_c": 0.870185185185185,
|
|
"calib/mu_w": 0.4983908045977011,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.13176706827309248,
|
|
"calib/std_conf": 0.3756979065059484,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.3911706315789474,
|
|
"calib/step_q_c_n": 950.0,
|
|
"calib/step_q_gap": 0.0785217636544191,
|
|
"calib/step_q_w": 0.3126488679245283,
|
|
"calib/step_q_w_n": 530.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2545.0,
|
|
"completions/max_terminated_length": 2545.0,
|
|
"completions/mean_length": 511.7265625,
|
|
"completions/mean_terminated_length": 511.7265625,
|
|
"completions/min_length": 119.0,
|
|
"completions/min_terminated_length": 119.0,
|
|
"epoch": 0.10133333333333333,
|
|
"grad_norm": 0.02649582363665104,
|
|
"kl": 0.07971954345703125,
|
|
"learning_rate": 2.916666666666667e-06,
|
|
"loss": -0.0095,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03568973019719124,
|
|
"mask/share_reasoning": 0.8352484107017517,
|
|
"mask/share_step_conf": 0.12906186282634735,
|
|
"num_tokens": 22888436.0,
|
|
"reward": 0.9772668480873108,
|
|
"reward_std": 0.18089798092842102,
|
|
"rewards/accuracy_reward_step": 0.6328125,
|
|
"rewards/asymmetric_l2_reward": 0.867957592010498,
|
|
"rewards/final_brier_reward_step": 0.7670449018478394,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 95
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6147359609603882,
|
|
"adv/mean_abs_reasoning": 0.4808635711669922,
|
|
"adv/mean_abs_step_conf": 0.7362314462661743,
|
|
"adv/ratio_final_to_reasoning": 1.2783999408990485,
|
|
"adv/ratio_step_to_reasoning": 1.5310609711595289,
|
|
"adv/std_final_conf": 0.8461417555809021,
|
|
"adv/std_reasoning": 0.7393701672554016,
|
|
"adv/std_step_conf": 0.9344803690910339,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7698014629049112,
|
|
"calib/avg_num_step_conf": 5.21484375,
|
|
"calib/ece": 0.17388888888888893,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.6746031746031746,
|
|
"calib/gap": 0.4222988505747126,
|
|
"calib/mean_conf": 0.7508730158730158,
|
|
"calib/mu_c": 0.8966666666666666,
|
|
"calib/mu_w": 0.474367816091954,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.13500000000000006,
|
|
"calib/std_conf": 0.37173556876008074,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.415,
|
|
"calib/step_q_c_n": 790.0,
|
|
"calib/step_q_gap": 0.07909174311926603,
|
|
"calib/step_q_w": 0.33590825688073395,
|
|
"calib/step_q_w_n": 545.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2628.0,
|
|
"completions/max_terminated_length": 2628.0,
|
|
"completions/mean_length": 462.59765625,
|
|
"completions/mean_terminated_length": 462.59765625,
|
|
"completions/min_length": 177.0,
|
|
"completions/min_terminated_length": 177.0,
|
|
"epoch": 0.1024,
|
|
"grad_norm": 0.026068033650517464,
|
|
"kl": 0.093292236328125,
|
|
"learning_rate": 2.888888888888889e-06,
|
|
"loss": 0.0366,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03652406111359596,
|
|
"mask/share_reasoning": 0.8425166606903076,
|
|
"mask/share_step_conf": 0.12095930427312851,
|
|
"num_tokens": 23112677.0,
|
|
"reward": 0.9878829717636108,
|
|
"reward_std": 0.2013242542743683,
|
|
"rewards/accuracy_reward_step": 0.64453125,
|
|
"rewards/asymmetric_l2_reward": 0.849997878074646,
|
|
"rewards/final_brier_reward_step": 0.8007679581642151,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 96
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7184832096099854,
|
|
"adv/mean_abs_reasoning": 0.59392249584198,
|
|
"adv/mean_abs_step_conf": 0.7223066091537476,
|
|
"adv/ratio_final_to_reasoning": 1.2097255359748929,
|
|
"adv/ratio_step_to_reasoning": 1.2161630755032482,
|
|
"adv/std_final_conf": 0.9094932079315186,
|
|
"adv/std_reasoning": 0.8265097737312317,
|
|
"adv/std_step_conf": 0.934262752532959,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6376647834274953,
|
|
"calib/avg_num_step_conf": 5.92578125,
|
|
"calib/ece": 0.28857312252964423,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5928853754940712,
|
|
"calib/gap": 0.2069691148775895,
|
|
"calib/mean_conf": 0.7016245059288537,
|
|
"calib/mu_c": 0.7981555555555556,
|
|
"calib/mu_w": 0.5911864406779661,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.22830039525691698,
|
|
"calib/std_conf": 0.3787427366765338,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40408163265306124,
|
|
"calib/step_q_c_n": 735.0,
|
|
"calib/step_q_gap": 0.07610209301111748,
|
|
"calib/step_q_w": 0.32797953964194376,
|
|
"calib/step_q_w_n": 782.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2470.0,
|
|
"completions/max_terminated_length": 2470.0,
|
|
"completions/mean_length": 460.6875,
|
|
"completions/mean_terminated_length": 462.494140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.10346666666666667,
|
|
"grad_norm": 0.0355035662651062,
|
|
"kl": 0.0928802490234375,
|
|
"learning_rate": 2.861111111111111e-06,
|
|
"loss": -0.0871,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03594241291284561,
|
|
"mask/share_reasoning": 0.8263546228408813,
|
|
"mask/share_step_conf": 0.13379667699337006,
|
|
"num_tokens": 23335685.0,
|
|
"reward": 0.9316617250442505,
|
|
"reward_std": 0.1955932229757309,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/asymmetric_l2_reward": 0.8849480152130127,
|
|
"rewards/final_brier_reward_step": 0.6744691133499146,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 97
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6633474826812744,
|
|
"adv/mean_abs_reasoning": 0.5149575471878052,
|
|
"adv/mean_abs_step_conf": 0.7673736214637756,
|
|
"adv/ratio_final_to_reasoning": 1.2881595508286654,
|
|
"adv/ratio_step_to_reasoning": 1.490168705467898,
|
|
"adv/std_final_conf": 0.8635463714599609,
|
|
"adv/std_reasoning": 0.7928749918937683,
|
|
"adv/std_step_conf": 0.9342027306556702,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.7269079083927212,
|
|
"calib/avg_num_step_conf": 4.8359375,
|
|
"calib/ece": 0.24089430894308933,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.94921875,
|
|
"calib/frac_conf_gt_0.9": 0.6382113821138211,
|
|
"calib/gap": 0.33125164325745526,
|
|
"calib/mean_conf": 0.718780487804878,
|
|
"calib/mu_c": 0.8493959731543624,
|
|
"calib/mu_w": 0.5181443298969072,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.1769918699186991,
|
|
"calib/std_conf": 0.3954385963986233,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.410972602739726,
|
|
"calib/step_q_c_n": 730.0,
|
|
"calib/step_q_gap": 0.04305724840901737,
|
|
"calib/step_q_w": 0.36791535433070865,
|
|
"calib/step_q_w_n": 508.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3024.0,
|
|
"completions/max_terminated_length": 3024.0,
|
|
"completions/mean_length": 510.96875,
|
|
"completions/mean_terminated_length": 512.9725952148438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.10453333333333334,
|
|
"grad_norm": 0.054965581744909286,
|
|
"kl": 0.08089447021484375,
|
|
"learning_rate": 2.8333333333333335e-06,
|
|
"loss": -0.0625,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.035400211811065674,
|
|
"mask/share_reasoning": 0.8491687774658203,
|
|
"mask/share_step_conf": 0.11152474582195282,
|
|
"num_tokens": 23572677.0,
|
|
"reward": 0.932797372341156,
|
|
"reward_std": 0.24011409282684326,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/asymmetric_l2_reward": 0.8460032343864441,
|
|
"rewards/final_brier_reward_step": 0.7133413553237915,
|
|
"rewards/format_reward_step": 0.94921875,
|
|
"step": 98
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7435926198959351,
|
|
"adv/mean_abs_reasoning": 0.6365935802459717,
|
|
"adv/mean_abs_step_conf": 0.7237546443939209,
|
|
"adv/ratio_final_to_reasoning": 1.1680806137074462,
|
|
"adv/ratio_step_to_reasoning": 1.1369179125467639,
|
|
"adv/std_final_conf": 0.9068244099617004,
|
|
"adv/std_reasoning": 0.859096884727478,
|
|
"adv/std_step_conf": 0.9345636367797852,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.6776324614352783,
|
|
"calib/avg_num_step_conf": 5.875,
|
|
"calib/ece": 0.2825910931174089,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.4534412955465587,
|
|
"calib/gap": 0.30777598926894706,
|
|
"calib/mean_conf": 0.5718218623481782,
|
|
"calib/mu_c": 0.7487619047619049,
|
|
"calib/mu_w": 0.4409859154929578,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.2146558704453441,
|
|
"calib/std_conf": 0.41920147804862407,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.3843027210884354,
|
|
"calib/step_q_c_n": 588.0,
|
|
"calib/step_q_gap": 0.06500141104476725,
|
|
"calib/step_q_w": 0.31930131004366813,
|
|
"calib/step_q_w_n": 916.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2887.0,
|
|
"completions/max_terminated_length": 2887.0,
|
|
"completions/mean_length": 586.73828125,
|
|
"completions/mean_terminated_length": 596.0516357421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 120.0,
|
|
"epoch": 0.1056,
|
|
"grad_norm": 0.060040753334760666,
|
|
"kl": 0.07159423828125,
|
|
"learning_rate": 2.805555555555556e-06,
|
|
"loss": -0.0674,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.031188489869236946,
|
|
"mask/share_reasoning": 0.8459464311599731,
|
|
"mask/share_step_conf": 0.10724010318517685,
|
|
"num_tokens": 23828682.0,
|
|
"reward": 0.8999078273773193,
|
|
"reward_std": 0.24172960221767426,
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
|
"rewards/asymmetric_l2_reward": 0.8456334471702576,
|
|
"rewards/final_brier_reward_step": 0.6799633502960205,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 99
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7108124494552612,
|
|
"adv/mean_abs_reasoning": 0.5766913294792175,
|
|
"adv/mean_abs_step_conf": 0.7223371863365173,
|
|
"adv/ratio_final_to_reasoning": 1.232570030309216,
|
|
"adv/ratio_step_to_reasoning": 1.2525542684833249,
|
|
"adv/std_final_conf": 0.9088829755783081,
|
|
"adv/std_reasoning": 0.8098970055580139,
|
|
"adv/std_step_conf": 0.9337549805641174,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7749548037190084,
|
|
"calib/avg_num_step_conf": 5.78125,
|
|
"calib/ece": 0.19120481927710847,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.5261044176706827,
|
|
"calib/gap": 0.4201213842975208,
|
|
"calib/mean_conf": 0.6380321285140562,
|
|
"calib/mu_c": 0.8421875000000001,
|
|
"calib/mu_w": 0.4220661157024793,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.15759036144578317,
|
|
"calib/std_conf": 0.4125205884747448,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.39832669322709163,
|
|
"calib/step_q_c_n": 753.0,
|
|
"calib/step_q_gap": 0.06741885278692661,
|
|
"calib/step_q_w": 0.330907840440165,
|
|
"calib/step_q_w_n": 727.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2536.0,
|
|
"completions/max_terminated_length": 2536.0,
|
|
"completions/mean_length": 584.765625,
|
|
"completions/mean_terminated_length": 584.765625,
|
|
"completions/min_length": 147.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.10666666666666667,
|
|
"grad_norm": 0.024518176913261414,
|
|
"kl": 0.07038116455078125,
|
|
"learning_rate": 2.7777777777777783e-06,
|
|
"loss": 0.0789,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03139622509479523,
|
|
"mask/share_reasoning": 0.8543316721916199,
|
|
"mask/share_step_conf": 0.1142721176147461,
|
|
"num_tokens": 24085790.0,
|
|
"reward": 0.9550304412841797,
|
|
"reward_std": 0.22531947493553162,
|
|
"rewards/accuracy_reward_step": 0.5,
|
|
"rewards/asymmetric_l2_reward": 0.8710557818412781,
|
|
"rewards/final_brier_reward_step": 0.7460362911224365,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 100
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6849584579467773,
|
|
"adv/mean_abs_reasoning": 0.5034070014953613,
|
|
"adv/mean_abs_step_conf": 0.7498930096626282,
|
|
"adv/ratio_final_to_reasoning": 1.3606454735673534,
|
|
"adv/ratio_step_to_reasoning": 1.4896356376353221,
|
|
"adv/std_final_conf": 0.8788121938705444,
|
|
"adv/std_reasoning": 0.7753551006317139,
|
|
"adv/std_step_conf": 0.9335606098175049,
|
|
"calib/answer_extract_rate": 0.95703125,
|
|
"calib/auroc": 0.6994607347489045,
|
|
"calib/avg_num_step_conf": 6.359375,
|
|
"calib/ece": 0.23389344262295084,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 0.4098360655737705,
|
|
"calib/gap": 0.30889046174587115,
|
|
"calib/mean_conf": 0.5250409836065575,
|
|
"calib/mu_c": 0.6883478260869564,
|
|
"calib/mu_w": 0.3794573643410853,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.1438114754098361,
|
|
"calib/std_conf": 0.4189100787257916,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.3757354925775978,
|
|
"calib/step_q_c_n": 741.0,
|
|
"calib/step_q_gap": 0.07503650723374206,
|
|
"calib/step_q_w": 0.30069898534385575,
|
|
"calib/step_q_w_n": 887.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2754.0,
|
|
"completions/max_terminated_length": 2754.0,
|
|
"completions/mean_length": 583.92578125,
|
|
"completions/mean_terminated_length": 586.2156982421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.10773333333333333,
|
|
"grad_norm": 0.034379322081804276,
|
|
"kl": 0.0738525390625,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": -0.0217,
|
|
"mask/has_final_conf_rate": 0.953125,
|
|
"mask/share_final_conf": 0.030435508117079735,
|
|
"mask/share_reasoning": 0.8451032638549805,
|
|
"mask/share_step_conf": 0.12055499106645584,
|
|
"num_tokens": 24342267.0,
|
|
"reward": 0.9144086837768555,
|
|
"reward_std": 0.21262916922569275,
|
|
"rewards/accuracy_reward_step": 0.453125,
|
|
"rewards/asymmetric_l2_reward": 0.855229377746582,
|
|
"rewards/final_brier_reward_step": 0.6923378705978394,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"step": 101
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5878695845603943,
|
|
"adv/mean_abs_reasoning": 0.3899923264980316,
|
|
"adv/mean_abs_step_conf": 0.7400535941123962,
|
|
"adv/ratio_final_to_reasoning": 1.5073875679534976,
|
|
"adv/ratio_step_to_reasoning": 1.8976106549525442,
|
|
"adv/std_final_conf": 0.8100722432136536,
|
|
"adv/std_reasoning": 0.681530773639679,
|
|
"adv/std_step_conf": 0.9334313869476318,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7806785051683011,
|
|
"calib/avg_num_step_conf": 5.70703125,
|
|
"calib/ece": 0.17003968253968255,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.5595238095238095,
|
|
"calib/gap": 0.40557513914656784,
|
|
"calib/mean_conf": 0.6737698412698412,
|
|
"calib/mu_c": 0.8314935064935066,
|
|
"calib/mu_w": 0.4259183673469388,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.11634920634920634,
|
|
"calib/std_conf": 0.38713599724165704,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40686440677966107,
|
|
"calib/step_q_c_n": 826.0,
|
|
"calib/step_q_gap": 0.0938722807954091,
|
|
"calib/step_q_w": 0.31299212598425197,
|
|
"calib/step_q_w_n": 635.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2741.0,
|
|
"completions/max_terminated_length": 2741.0,
|
|
"completions/mean_length": 480.0234375,
|
|
"completions/mean_terminated_length": 480.0234375,
|
|
"completions/min_length": 124.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.1088,
|
|
"grad_norm": 0.06203492358326912,
|
|
"kl": 0.08522796630859375,
|
|
"learning_rate": 2.7222222222222224e-06,
|
|
"loss": 0.0534,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03778192400932312,
|
|
"mask/share_reasoning": 0.8289196491241455,
|
|
"mask/share_step_conf": 0.13329845666885376,
|
|
"num_tokens": 24571849.0,
|
|
"reward": 0.9879881143569946,
|
|
"reward_std": 0.15715520083904266,
|
|
"rewards/accuracy_reward_step": 0.6015625,
|
|
"rewards/asymmetric_l2_reward": 0.8699907064437866,
|
|
"rewards/final_brier_reward_step": 0.7887980341911316,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 102
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6246525049209595,
|
|
"adv/mean_abs_reasoning": 0.3913930654525757,
|
|
"adv/mean_abs_step_conf": 0.733914852142334,
|
|
"adv/ratio_final_to_reasoning": 1.595972335888632,
|
|
"adv/ratio_step_to_reasoning": 1.8751350417864288,
|
|
"adv/std_final_conf": 0.8219296932220459,
|
|
"adv/std_reasoning": 0.6816326975822449,
|
|
"adv/std_step_conf": 0.9333863854408264,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.7566779346457214,
|
|
"calib/avg_num_step_conf": 5.890625,
|
|
"calib/ece": 0.19665322580645156,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.5806451612903226,
|
|
"calib/gap": 0.37074898919599664,
|
|
"calib/mean_conf": 0.7041532258064516,
|
|
"calib/mu_c": 0.8641134751773051,
|
|
"calib/mu_w": 0.49336448598130844,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.16612903225806447,
|
|
"calib/std_conf": 0.38403745500202563,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4069795918367347,
|
|
"calib/step_q_c_n": 735.0,
|
|
"calib/step_q_gap": 0.09437933310452257,
|
|
"calib/step_q_w": 0.31260025873221214,
|
|
"calib/step_q_w_n": 773.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2898.0,
|
|
"completions/max_terminated_length": 2898.0,
|
|
"completions/mean_length": 587.85546875,
|
|
"completions/mean_terminated_length": 590.1608276367188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 154.0,
|
|
"epoch": 0.10986666666666667,
|
|
"grad_norm": 0.0305598396807909,
|
|
"kl": 0.06987762451171875,
|
|
"learning_rate": 2.6944444444444444e-06,
|
|
"loss": 0.0375,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03288574516773224,
|
|
"mask/share_reasoning": 0.8510973453521729,
|
|
"mask/share_step_conf": 0.11211065948009491,
|
|
"num_tokens": 24826892.0,
|
|
"reward": 0.958093523979187,
|
|
"reward_std": 0.1841270923614502,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/asymmetric_l2_reward": 0.8656498193740845,
|
|
"rewards/final_brier_reward_step": 0.7466309070587158,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 103
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6745511293411255,
|
|
"adv/mean_abs_reasoning": 0.437224805355072,
|
|
"adv/mean_abs_step_conf": 0.7339380383491516,
|
|
"adv/ratio_final_to_reasoning": 1.54280160018213,
|
|
"adv/ratio_step_to_reasoning": 1.678628543851984,
|
|
"adv/std_final_conf": 0.8667935729026794,
|
|
"adv/std_reasoning": 0.7206018567085266,
|
|
"adv/std_step_conf": 0.9330757260322571,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7567415025711777,
|
|
"calib/avg_num_step_conf": 6.078125,
|
|
"calib/ece": 0.18438735177865614,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.4505928853754941,
|
|
"calib/gap": 0.4076044149002886,
|
|
"calib/mean_conf": 0.5739525691699605,
|
|
"calib/mu_c": 0.7656716417910449,
|
|
"calib/mu_w": 0.35806722689075626,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.11434782608695651,
|
|
"calib/std_conf": 0.4134958809953477,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.407767253044655,
|
|
"calib/step_q_c_n": 739.0,
|
|
"calib/step_q_gap": 0.12217361779373703,
|
|
"calib/step_q_w": 0.28559363525091797,
|
|
"calib/step_q_w_n": 817.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2485.0,
|
|
"completions/max_terminated_length": 2485.0,
|
|
"completions/mean_length": 541.33203125,
|
|
"completions/mean_terminated_length": 541.33203125,
|
|
"completions/min_length": 187.0,
|
|
"completions/min_terminated_length": 187.0,
|
|
"epoch": 0.11093333333333333,
|
|
"grad_norm": 0.04134015738964081,
|
|
"kl": 0.07474517822265625,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": -0.0286,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.032088808715343475,
|
|
"mask/share_reasoning": 0.8443004488945007,
|
|
"mask/share_step_conf": 0.12361074984073639,
|
|
"num_tokens": 25072153.0,
|
|
"reward": 0.9815112948417664,
|
|
"reward_std": 0.16471192240715027,
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
|
"rewards/asymmetric_l2_reward": 0.8935236930847168,
|
|
"rewards/final_brier_reward_step": 0.7687175869941711,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 104
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7218550443649292,
|
|
"adv/mean_abs_reasoning": 0.6229248642921448,
|
|
"adv/mean_abs_step_conf": 0.7113521099090576,
|
|
"adv/ratio_final_to_reasoning": 1.1588155903604889,
|
|
"adv/ratio_step_to_reasoning": 1.1419549141249905,
|
|
"adv/std_final_conf": 0.9065027236938477,
|
|
"adv/std_reasoning": 0.8430431485176086,
|
|
"adv/std_step_conf": 0.9336177110671997,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7497893577030267,
|
|
"calib/avg_num_step_conf": 5.77734375,
|
|
"calib/ece": 0.19215999999999994,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.496,
|
|
"calib/gap": 0.3931103765636139,
|
|
"calib/mean_conf": 0.6092000000000001,
|
|
"calib/mu_c": 0.7837410071942446,
|
|
"calib/mu_w": 0.39063063063063064,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.12267999999999994,
|
|
"calib/std_conf": 0.41203417334002773,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.3973221216041397,
|
|
"calib/step_q_c_n": 773.0,
|
|
"calib/step_q_gap": 0.08317197996108017,
|
|
"calib/step_q_w": 0.31415014164305954,
|
|
"calib/step_q_w_n": 706.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2826.0,
|
|
"completions/max_terminated_length": 2826.0,
|
|
"completions/mean_length": 542.796875,
|
|
"completions/mean_terminated_length": 542.796875,
|
|
"completions/min_length": 129.0,
|
|
"completions/min_terminated_length": 129.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.05769439414143562,
|
|
"kl": 0.07154083251953125,
|
|
"learning_rate": 2.6388888888888893e-06,
|
|
"loss": 0.0324,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03391508758068085,
|
|
"mask/share_reasoning": 0.8441091179847717,
|
|
"mask/share_step_conf": 0.12197580933570862,
|
|
"num_tokens": 25316869.0,
|
|
"reward": 0.9692540168762207,
|
|
"reward_std": 0.21710465848445892,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.8789149522781372,
|
|
"rewards/final_brier_reward_step": 0.7564679384231567,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 105
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6788108348846436,
|
|
"adv/mean_abs_reasoning": 0.4150359034538269,
|
|
"adv/mean_abs_step_conf": 0.7503209114074707,
|
|
"adv/ratio_final_to_reasoning": 1.6355472604556531,
|
|
"adv/ratio_step_to_reasoning": 1.807845791565222,
|
|
"adv/std_final_conf": 0.8545103669166565,
|
|
"adv/std_reasoning": 0.7012789249420166,
|
|
"adv/std_step_conf": 0.9325715899467468,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7854966677245319,
|
|
"calib/avg_num_step_conf": 5.3671875,
|
|
"calib/ece": 0.18345238095238095,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.5436507936507936,
|
|
"calib/gap": 0.39273310060298333,
|
|
"calib/mean_conf": 0.6619444444444444,
|
|
"calib/mu_c": 0.8411678832116789,
|
|
"calib/mu_w": 0.44843478260869557,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.15087301587301588,
|
|
"calib/std_conf": 0.39241896960675976,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40268258426966297,
|
|
"calib/step_q_c_n": 712.0,
|
|
"calib/step_q_gap": 0.08248620964730641,
|
|
"calib/step_q_w": 0.32019637462235656,
|
|
"calib/step_q_w_n": 662.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2670.0,
|
|
"completions/max_terminated_length": 2670.0,
|
|
"completions/mean_length": 481.9765625,
|
|
"completions/mean_terminated_length": 481.9765625,
|
|
"completions/min_length": 152.0,
|
|
"completions/min_terminated_length": 152.0,
|
|
"epoch": 0.11306666666666666,
|
|
"grad_norm": 0.04111357033252716,
|
|
"kl": 0.0830841064453125,
|
|
"learning_rate": 2.6111111111111113e-06,
|
|
"loss": -0.0094,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.035246968269348145,
|
|
"mask/share_reasoning": 0.8441534042358398,
|
|
"mask/share_step_conf": 0.1205996572971344,
|
|
"num_tokens": 25544839.0,
|
|
"reward": 0.9766442775726318,
|
|
"reward_std": 0.1666399985551834,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/asymmetric_l2_reward": 0.8827617168426514,
|
|
"rewards/final_brier_reward_step": 0.7666206955909729,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 106
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6700491905212402,
|
|
"adv/mean_abs_reasoning": 0.46548759937286377,
|
|
"adv/mean_abs_step_conf": 0.7599701881408691,
|
|
"adv/ratio_final_to_reasoning": 1.4394565857908472,
|
|
"adv/ratio_step_to_reasoning": 1.6326325108654927,
|
|
"adv/std_final_conf": 0.8719359636306763,
|
|
"adv/std_reasoning": 0.7206145524978638,
|
|
"adv/std_step_conf": 0.9324932098388672,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6578612753512619,
|
|
"calib/avg_num_step_conf": 6.25390625,
|
|
"calib/ece": 0.2519291338582677,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6653543307086615,
|
|
"calib/gap": 0.2677951554453557,
|
|
"calib/mean_conf": 0.7520866141732283,
|
|
"calib/mu_c": 0.8648979591836735,
|
|
"calib/mu_w": 0.5971028037383178,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.21263779527559057,
|
|
"calib/std_conf": 0.3685606932704154,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40989394285714287,
|
|
"calib/step_q_c_n": 875.0,
|
|
"calib/step_q_gap": 0.08921901172766628,
|
|
"calib/step_q_w": 0.3206749311294766,
|
|
"calib/step_q_w_n": 726.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2140.0,
|
|
"completions/max_terminated_length": 2140.0,
|
|
"completions/mean_length": 491.32421875,
|
|
"completions/mean_terminated_length": 491.32421875,
|
|
"completions/min_length": 160.0,
|
|
"completions/min_terminated_length": 160.0,
|
|
"epoch": 0.11413333333333334,
|
|
"grad_norm": 0.03381947800517082,
|
|
"kl": 0.08031463623046875,
|
|
"learning_rate": 2.5833333333333337e-06,
|
|
"loss": 0.0376,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03411445766687393,
|
|
"mask/share_reasoning": 0.8311688899993896,
|
|
"mask/share_step_conf": 0.13471662998199463,
|
|
"num_tokens": 25775234.0,
|
|
"reward": 0.9557232856750488,
|
|
"reward_std": 0.1572231650352478,
|
|
"rewards/accuracy_reward_step": 0.57421875,
|
|
"rewards/asymmetric_l2_reward": 0.8837653398513794,
|
|
"rewards/final_brier_reward_step": 0.7151812314987183,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 107
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5977965593338013,
|
|
"adv/mean_abs_reasoning": 0.40435507893562317,
|
|
"adv/mean_abs_step_conf": 0.7389947772026062,
|
|
"adv/ratio_final_to_reasoning": 1.4783950801542316,
|
|
"adv/ratio_step_to_reasoning": 1.8275887102688293,
|
|
"adv/std_final_conf": 0.8289056420326233,
|
|
"adv/std_reasoning": 0.6816290020942688,
|
|
"adv/std_step_conf": 0.9333050847053528,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7069702328323018,
|
|
"calib/avg_num_step_conf": 6.2578125,
|
|
"calib/ece": 0.19730158730158717,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.7063492063492064,
|
|
"calib/gap": 0.33805481874447396,
|
|
"calib/mean_conf": 0.7677777777777778,
|
|
"calib/mu_c": 0.8724137931034484,
|
|
"calib/mu_w": 0.5343589743589744,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.13730158730158717,
|
|
"calib/std_conf": 0.37361466629570667,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.42418063314711363,
|
|
"calib/step_q_c_n": 1074.0,
|
|
"calib/step_q_gap": 0.08563896648044694,
|
|
"calib/step_q_w": 0.3385416666666667,
|
|
"calib/step_q_w_n": 528.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2421.0,
|
|
"completions/max_terminated_length": 2421.0,
|
|
"completions/mean_length": 530.125,
|
|
"completions/mean_terminated_length": 532.2039794921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 127.0,
|
|
"epoch": 0.1152,
|
|
"grad_norm": 0.053452033549547195,
|
|
"kl": 0.07540130615234375,
|
|
"learning_rate": 2.5555555555555557e-06,
|
|
"loss": -0.0121,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03260500729084015,
|
|
"mask/share_reasoning": 0.8317328691482544,
|
|
"mask/share_step_conf": 0.13175587356090546,
|
|
"num_tokens": 26014178.0,
|
|
"reward": 0.9928351044654846,
|
|
"reward_std": 0.16699038445949554,
|
|
"rewards/accuracy_reward_step": 0.6796875,
|
|
"rewards/asymmetric_l2_reward": 0.8799116611480713,
|
|
"rewards/final_brier_reward_step": 0.7729461193084717,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 108
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6117569804191589,
|
|
"adv/mean_abs_reasoning": 0.3977644145488739,
|
|
"adv/mean_abs_step_conf": 0.7265390157699585,
|
|
"adv/ratio_final_to_reasoning": 1.5379882112204168,
|
|
"adv/ratio_step_to_reasoning": 1.826556095004038,
|
|
"adv/std_final_conf": 0.8428977131843567,
|
|
"adv/std_reasoning": 0.701329231262207,
|
|
"adv/std_step_conf": 0.9332946538925171,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.8309944119888238,
|
|
"calib/avg_num_step_conf": 6.8203125,
|
|
"calib/ece": 0.15553784860557762,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.549800796812749,
|
|
"calib/gap": 0.5199441198882397,
|
|
"calib/mean_conf": 0.6278884462151394,
|
|
"calib/mu_c": 0.8909677419354839,
|
|
"calib/mu_w": 0.3710236220472441,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.14470119521912345,
|
|
"calib/std_conf": 0.4216018631150103,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4085305105853051,
|
|
"calib/step_q_c_n": 803.0,
|
|
"calib/step_q_gap": 0.10964397824172079,
|
|
"calib/step_q_w": 0.2988865323435843,
|
|
"calib/step_q_w_n": 943.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2384.0,
|
|
"completions/max_terminated_length": 2384.0,
|
|
"completions/mean_length": 556.96484375,
|
|
"completions/mean_terminated_length": 556.96484375,
|
|
"completions/min_length": 165.0,
|
|
"completions/min_terminated_length": 165.0,
|
|
"epoch": 0.11626666666666667,
|
|
"grad_norm": 0.02547648921608925,
|
|
"kl": 0.07645416259765625,
|
|
"learning_rate": 2.5277777777777778e-06,
|
|
"loss": -0.0174,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.031178954988718033,
|
|
"mask/share_reasoning": 0.8346405029296875,
|
|
"mask/share_step_conf": 0.13418057560920715,
|
|
"num_tokens": 26261361.0,
|
|
"reward": 0.9844216108322144,
|
|
"reward_std": 0.1782597303390503,
|
|
"rewards/accuracy_reward_step": 0.484375,
|
|
"rewards/asymmetric_l2_reward": 0.8859130144119263,
|
|
"rewards/final_brier_reward_step": 0.791523814201355,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 109
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7054433226585388,
|
|
"adv/mean_abs_reasoning": 0.4843199551105499,
|
|
"adv/mean_abs_step_conf": 0.7825179100036621,
|
|
"adv/ratio_final_to_reasoning": 1.4565646432997288,
|
|
"adv/ratio_step_to_reasoning": 1.615704456829672,
|
|
"adv/std_final_conf": 0.8620911240577698,
|
|
"adv/std_reasoning": 0.7207316160202026,
|
|
"adv/std_step_conf": 0.9318588376045227,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6559316569954868,
|
|
"calib/avg_num_step_conf": 5.3671875,
|
|
"calib/ece": 0.27661354581673303,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.5896414342629482,
|
|
"calib/gap": 0.260464861379755,
|
|
"calib/mean_conf": 0.6825896414342629,
|
|
"calib/mu_c": 0.7967375886524822,
|
|
"calib/mu_w": 0.5362727272727272,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.1987250996015936,
|
|
"calib/std_conf": 0.4021932734586809,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.44165925925925925,
|
|
"calib/step_q_c_n": 675.0,
|
|
"calib/step_q_gap": 0.09314709903036078,
|
|
"calib/step_q_w": 0.34851216022889847,
|
|
"calib/step_q_w_n": 699.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2391.0,
|
|
"completions/max_terminated_length": 2391.0,
|
|
"completions/mean_length": 491.0390625,
|
|
"completions/mean_terminated_length": 491.0390625,
|
|
"completions/min_length": 99.0,
|
|
"completions/min_terminated_length": 99.0,
|
|
"epoch": 0.11733333333333333,
|
|
"grad_norm": 0.04176861792802811,
|
|
"kl": 0.1174468994140625,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.0367,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03552088141441345,
|
|
"mask/share_reasoning": 0.8433677554130554,
|
|
"mask/share_step_conf": 0.12111136317253113,
|
|
"num_tokens": 26491987.0,
|
|
"reward": 0.929660439491272,
|
|
"reward_std": 0.18326841294765472,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/asymmetric_l2_reward": 0.8658431172370911,
|
|
"rewards/final_brier_reward_step": 0.6880090236663818,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 110
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.614783763885498,
|
|
"adv/mean_abs_reasoning": 0.5257919430732727,
|
|
"adv/mean_abs_step_conf": 0.7170236110687256,
|
|
"adv/ratio_final_to_reasoning": 1.169252918354102,
|
|
"adv/ratio_step_to_reasoning": 1.3637021649242036,
|
|
"adv/std_final_conf": 0.8417708277702332,
|
|
"adv/std_reasoning": 0.7755916118621826,
|
|
"adv/std_step_conf": 0.933610200881958,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6853343013662464,
|
|
"calib/avg_num_step_conf": 5.546875,
|
|
"calib/ece": 0.2681854838709678,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.6370967741935484,
|
|
"calib/gap": 0.30863111345785776,
|
|
"calib/mean_conf": 0.7039919354838711,
|
|
"calib/mu_c": 0.8396402877697843,
|
|
"calib/mu_w": 0.5310091743119265,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.20584677419354847,
|
|
"calib/std_conf": 0.40234468043015986,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4302653631284916,
|
|
"calib/step_q_c_n": 716.0,
|
|
"calib/step_q_gap": 0.07023695403758251,
|
|
"calib/step_q_w": 0.3600284090909091,
|
|
"calib/step_q_w_n": 704.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2537.0,
|
|
"completions/max_terminated_length": 2537.0,
|
|
"completions/mean_length": 529.79296875,
|
|
"completions/mean_terminated_length": 531.87060546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.1184,
|
|
"grad_norm": 0.03763270750641823,
|
|
"kl": 0.0719451904296875,
|
|
"learning_rate": 2.4722222222222226e-06,
|
|
"loss": -0.0694,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03530872240662575,
|
|
"mask/share_reasoning": 0.8400354385375977,
|
|
"mask/share_step_conf": 0.12074960768222809,
|
|
"num_tokens": 26735022.0,
|
|
"reward": 0.9260072708129883,
|
|
"reward_std": 0.21023571491241455,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.8536633253097534,
|
|
"rewards/final_brier_reward_step": 0.6967886686325073,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 111
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6220129728317261,
|
|
"adv/mean_abs_reasoning": 0.4993098974227905,
|
|
"adv/mean_abs_step_conf": 0.7455708384513855,
|
|
"adv/ratio_final_to_reasoning": 1.2457453297887198,
|
|
"adv/ratio_step_to_reasoning": 1.4932026028317913,
|
|
"adv/std_final_conf": 0.8272049427032471,
|
|
"adv/std_reasoning": 0.7575883865356445,
|
|
"adv/std_step_conf": 0.9338021874427795,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.8360327743902439,
|
|
"calib/avg_num_step_conf": 5.56640625,
|
|
"calib/ece": 0.16860557768924303,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.5059760956175299,
|
|
"calib/gap": 0.5284616361788618,
|
|
"calib/mean_conf": 0.5791235059760956,
|
|
"calib/mu_c": 0.8486178861788618,
|
|
"calib/mu_w": 0.32015625000000003,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.1288446215139442,
|
|
"calib/std_conf": 0.4347972333728267,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.45525581395348835,
|
|
"calib/step_q_c_n": 645.0,
|
|
"calib/step_q_gap": 0.12670453190220632,
|
|
"calib/step_q_w": 0.32855128205128203,
|
|
"calib/step_q_w_n": 780.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2709.0,
|
|
"completions/max_terminated_length": 2709.0,
|
|
"completions/mean_length": 565.5390625,
|
|
"completions/mean_terminated_length": 567.7568969726562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 193.0,
|
|
"epoch": 0.11946666666666667,
|
|
"grad_norm": 0.0486106239259243,
|
|
"kl": 0.07489013671875,
|
|
"learning_rate": 2.4444444444444447e-06,
|
|
"loss": -0.0159,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.030696198344230652,
|
|
"mask/share_reasoning": 0.8548277020454407,
|
|
"mask/share_step_conf": 0.11056986451148987,
|
|
"num_tokens": 26987720.0,
|
|
"reward": 0.973731517791748,
|
|
"reward_std": 0.18799945712089539,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/asymmetric_l2_reward": 0.8586279153823853,
|
|
"rewards/final_brier_reward_step": 0.7974288463592529,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 112
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6730247139930725,
|
|
"adv/mean_abs_reasoning": 0.5346618890762329,
|
|
"adv/mean_abs_step_conf": 0.730544924736023,
|
|
"adv/ratio_final_to_reasoning": 1.258785650789319,
|
|
"adv/ratio_step_to_reasoning": 1.3663680536464433,
|
|
"adv/std_final_conf": 0.8757805824279785,
|
|
"adv/std_reasoning": 0.8097303509712219,
|
|
"adv/std_step_conf": 0.933262050151825,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7831081081081082,
|
|
"calib/avg_num_step_conf": 6.359375,
|
|
"calib/ece": 0.1924557768924303,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.6454183266932271,
|
|
"calib/gap": 0.4057848133848132,
|
|
"calib/mean_conf": 0.7248350597609562,
|
|
"calib/mu_c": 0.9042857142857142,
|
|
"calib/mu_w": 0.49850090090090104,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.1797609561752988,
|
|
"calib/std_conf": 0.3822900440956289,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.40888641425389755,
|
|
"calib/step_q_c_n": 898.0,
|
|
"calib/step_q_gap": 0.06416038685663722,
|
|
"calib/step_q_w": 0.3447260273972603,
|
|
"calib/step_q_w_n": 730.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2549.0,
|
|
"completions/max_terminated_length": 2549.0,
|
|
"completions/mean_length": 490.23828125,
|
|
"completions/mean_terminated_length": 490.23828125,
|
|
"completions/min_length": 181.0,
|
|
"completions/min_terminated_length": 181.0,
|
|
"epoch": 0.12053333333333334,
|
|
"grad_norm": 0.041349541395902634,
|
|
"kl": 0.0941314697265625,
|
|
"learning_rate": 2.4166666666666667e-06,
|
|
"loss": 0.0675,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.035144634544849396,
|
|
"mask/share_reasoning": 0.8297086954116821,
|
|
"mask/share_step_conf": 0.1351466178894043,
|
|
"num_tokens": 27218421.0,
|
|
"reward": 0.982533872127533,
|
|
"reward_std": 0.20815324783325195,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/asymmetric_l2_reward": 0.894577145576477,
|
|
"rewards/final_brier_reward_step": 0.7642405033111572,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 113
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5990077257156372,
|
|
"adv/mean_abs_reasoning": 0.44762060046195984,
|
|
"adv/mean_abs_step_conf": 0.7410391569137573,
|
|
"adv/ratio_final_to_reasoning": 1.3382041065523809,
|
|
"adv/ratio_step_to_reasoning": 1.6555072669778368,
|
|
"adv/std_final_conf": 0.8304715156555176,
|
|
"adv/std_reasoning": 0.7391616702079773,
|
|
"adv/std_step_conf": 0.9329032897949219,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.8231817271087959,
|
|
"calib/avg_num_step_conf": 5.91015625,
|
|
"calib/ece": 0.19153543307086618,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.7401574803149606,
|
|
"calib/gap": 0.44428170707273107,
|
|
"calib/mean_conf": 0.7983858267716535,
|
|
"calib/mu_c": 0.9610559006211181,
|
|
"calib/mu_w": 0.516774193548387,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.17803149606299218,
|
|
"calib/std_conf": 0.3566744815421641,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.44985092491838957,
|
|
"calib/step_q_c_n": 919.0,
|
|
"calib/step_q_gap": 0.11490142996889463,
|
|
"calib/step_q_w": 0.33494949494949494,
|
|
"calib/step_q_w_n": 594.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2880.0,
|
|
"completions/max_terminated_length": 2880.0,
|
|
"completions/mean_length": 496.80859375,
|
|
"completions/mean_terminated_length": 498.75689697265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.1216,
|
|
"grad_norm": 0.05756969749927521,
|
|
"kl": 0.081695556640625,
|
|
"learning_rate": 2.388888888888889e-06,
|
|
"loss": 0.0596,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.0352327898144722,
|
|
"mask/share_reasoning": 0.8273290395736694,
|
|
"mask/share_step_conf": 0.13353195786476135,
|
|
"num_tokens": 27450628.0,
|
|
"reward": 1.0205554962158203,
|
|
"reward_std": 0.18426382541656494,
|
|
"rewards/accuracy_reward_step": 0.62890625,
|
|
"rewards/asymmetric_l2_reward": 0.903445839881897,
|
|
"rewards/final_brier_reward_step": 0.8134465217590332,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 114
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6113198399543762,
|
|
"adv/mean_abs_reasoning": 0.49032288789749146,
|
|
"adv/mean_abs_step_conf": 0.7666841745376587,
|
|
"adv/ratio_final_to_reasoning": 1.2467699449554979,
|
|
"adv/ratio_step_to_reasoning": 1.5636312182472385,
|
|
"adv/std_final_conf": 0.8144198060035706,
|
|
"adv/std_reasoning": 0.739387035369873,
|
|
"adv/std_step_conf": 0.9330512881278992,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6646076046600947,
|
|
"calib/avg_num_step_conf": 5.3671875,
|
|
"calib/ece": 0.3328063241106719,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.7747035573122529,
|
|
"calib/gap": 0.16428946357700702,
|
|
"calib/mean_conf": 0.8259288537549406,
|
|
"calib/mu_c": 0.8954109589041097,
|
|
"calib/mu_w": 0.7311214953271027,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.29083003952569164,
|
|
"calib/std_conf": 0.3312970480832067,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.45648578811369506,
|
|
"calib/step_q_c_n": 774.0,
|
|
"calib/step_q_gap": 0.07716912144702831,
|
|
"calib/step_q_w": 0.37931666666666675,
|
|
"calib/step_q_w_n": 600.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2616.0,
|
|
"completions/max_terminated_length": 2616.0,
|
|
"completions/mean_length": 466.1875,
|
|
"completions/mean_terminated_length": 466.1875,
|
|
"completions/min_length": 115.0,
|
|
"completions/min_terminated_length": 115.0,
|
|
"epoch": 0.12266666666666666,
|
|
"grad_norm": 0.0432070791721344,
|
|
"kl": 0.07665252685546875,
|
|
"learning_rate": 2.361111111111111e-06,
|
|
"loss": 0.0476,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03583553060889244,
|
|
"mask/share_reasoning": 0.835993766784668,
|
|
"mask/share_step_conf": 0.12817072868347168,
|
|
"num_tokens": 27675236.0,
|
|
"reward": 0.9144834280014038,
|
|
"reward_std": 0.20204773545265198,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/asymmetric_l2_reward": 0.8649461269378662,
|
|
"rewards/final_brier_reward_step": 0.6530832052230835,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 115
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6029642820358276,
|
|
"adv/mean_abs_reasoning": 0.39965057373046875,
|
|
"adv/mean_abs_step_conf": 0.7622563242912292,
|
|
"adv/ratio_final_to_reasoning": 1.508728678674379,
|
|
"adv/ratio_step_to_reasoning": 1.9073069686253175,
|
|
"adv/std_final_conf": 0.8000175952911377,
|
|
"adv/std_reasoning": 0.661288321018219,
|
|
"adv/std_step_conf": 0.9327738285064697,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.7190315315315317,
|
|
"calib/avg_num_step_conf": 6.15625,
|
|
"calib/ece": 0.3134901960784313,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.803921568627451,
|
|
"calib/gap": 0.25926801801801813,
|
|
"calib/mean_conf": 0.8483921568627453,
|
|
"calib/mu_c": 0.9612500000000002,
|
|
"calib/mu_w": 0.701981981981982,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2985882352941176,
|
|
"calib/std_conf": 0.32429374716187354,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4388196176226101,
|
|
"calib/step_q_c_n": 802.0,
|
|
"calib/step_q_gap": 0.10645527653733883,
|
|
"calib/step_q_w": 0.3323643410852713,
|
|
"calib/step_q_w_n": 774.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2569.0,
|
|
"completions/max_terminated_length": 2569.0,
|
|
"completions/mean_length": 524.25,
|
|
"completions/mean_terminated_length": 524.25,
|
|
"completions/min_length": 99.0,
|
|
"completions/min_terminated_length": 99.0,
|
|
"epoch": 0.12373333333333333,
|
|
"grad_norm": 0.028108853846788406,
|
|
"kl": 0.0692291259765625,
|
|
"learning_rate": 2.3333333333333336e-06,
|
|
"loss": 0.0478,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.034591950476169586,
|
|
"mask/share_reasoning": 0.838742733001709,
|
|
"mask/share_step_conf": 0.1266653686761856,
|
|
"num_tokens": 27913964.0,
|
|
"reward": 0.9537807106971741,
|
|
"reward_std": 0.19260621070861816,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/asymmetric_l2_reward": 0.9025558829307556,
|
|
"rewards/final_brier_reward_step": 0.6932867169380188,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 116
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6685183048248291,
|
|
"adv/mean_abs_reasoning": 0.4874965250492096,
|
|
"adv/mean_abs_step_conf": 0.740862250328064,
|
|
"adv/ratio_final_to_reasoning": 1.37132937461933,
|
|
"adv/ratio_step_to_reasoning": 1.519728269351825,
|
|
"adv/std_final_conf": 0.8593764305114746,
|
|
"adv/std_reasoning": 0.7574522495269775,
|
|
"adv/std_step_conf": 0.9332804679870605,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6033924680983505,
|
|
"calib/avg_num_step_conf": 5.78515625,
|
|
"calib/ece": 0.44433070866141733,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.8622047244094488,
|
|
"calib/gap": 0.1423678804855275,
|
|
"calib/mean_conf": 0.8918110236220473,
|
|
"calib/mu_c": 0.9674789915966386,
|
|
"calib/mu_w": 0.8251111111111111,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.4338188976377953,
|
|
"calib/std_conf": 0.27420153896899985,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.44812593703148423,
|
|
"calib/step_q_c_n": 667.0,
|
|
"calib/step_q_gap": 0.06114805005359725,
|
|
"calib/step_q_w": 0.386977886977887,
|
|
"calib/step_q_w_n": 814.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1846.0,
|
|
"completions/max_terminated_length": 1846.0,
|
|
"completions/mean_length": 502.703125,
|
|
"completions/mean_terminated_length": 502.703125,
|
|
"completions/min_length": 199.0,
|
|
"completions/min_terminated_length": 199.0,
|
|
"epoch": 0.1248,
|
|
"grad_norm": 0.04808332771062851,
|
|
"kl": 0.07646942138671875,
|
|
"learning_rate": 2.305555555555556e-06,
|
|
"loss": -0.0376,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03324298560619354,
|
|
"mask/share_reasoning": 0.8397763967514038,
|
|
"mask/share_step_conf": 0.12698057293891907,
|
|
"num_tokens": 28149256.0,
|
|
"reward": 0.8545684814453125,
|
|
"reward_std": 0.2120169848203659,
|
|
"rewards/accuracy_reward_step": 0.46484375,
|
|
"rewards/asymmetric_l2_reward": 0.8599258661270142,
|
|
"rewards/final_brier_reward_step": 0.5593671798706055,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 117
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5423208475112915,
|
|
"adv/mean_abs_reasoning": 0.4119876027107239,
|
|
"adv/mean_abs_step_conf": 0.757232129573822,
|
|
"adv/ratio_final_to_reasoning": 1.3163523463886384,
|
|
"adv/ratio_step_to_reasoning": 1.8379973683468112,
|
|
"adv/std_final_conf": 0.7762859463691711,
|
|
"adv/std_reasoning": 0.7013146877288818,
|
|
"adv/std_step_conf": 0.9321770668029785,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6684493754982728,
|
|
"calib/avg_num_step_conf": 7.18359375,
|
|
"calib/ece": 0.3157258064516129,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.8266129032258065,
|
|
"calib/gap": 0.24094339622641503,
|
|
"calib/mean_conf": 0.8620161290322582,
|
|
"calib/mu_c": 0.965,
|
|
"calib/mu_w": 0.7240566037735849,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.30258064516129035,
|
|
"calib/std_conf": 0.3080400577184876,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.4405574516496018,
|
|
"calib/step_q_c_n": 879.0,
|
|
"calib/step_q_gap": 0.10321370164960181,
|
|
"calib/step_q_w": 0.33734375,
|
|
"calib/step_q_w_n": 960.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2737.0,
|
|
"completions/max_terminated_length": 2737.0,
|
|
"completions/mean_length": 588.1640625,
|
|
"completions/mean_terminated_length": 590.4706420898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 135.0,
|
|
"epoch": 0.12586666666666665,
|
|
"grad_norm": 0.045879751443862915,
|
|
"kl": 0.06640625,
|
|
"learning_rate": 2.277777777777778e-06,
|
|
"loss": 0.0014,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03145633265376091,
|
|
"mask/share_reasoning": 0.8349190950393677,
|
|
"mask/share_step_conf": 0.1297183483839035,
|
|
"num_tokens": 28403834.0,
|
|
"reward": 0.9131325483322144,
|
|
"reward_std": 0.18790775537490845,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/asymmetric_l2_reward": 0.8495236039161682,
|
|
"rewards/final_brier_reward_step": 0.672835111618042,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 118
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.579788863658905,
|
|
"adv/mean_abs_reasoning": 0.5056012868881226,
|
|
"adv/mean_abs_step_conf": 0.7397458553314209,
|
|
"adv/ratio_final_to_reasoning": 1.146731384382727,
|
|
"adv/ratio_step_to_reasoning": 1.4631012114000195,
|
|
"adv/std_final_conf": 0.797834575176239,
|
|
"adv/std_reasoning": 0.7394025921821594,
|
|
"adv/std_step_conf": 0.9339104890823364,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7308721765243504,
|
|
"calib/avg_num_step_conf": 6.29296875,
|
|
"calib/ece": 0.2750200803212853,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.7309236947791165,
|
|
"calib/gap": 0.310268311790051,
|
|
"calib/mean_conf": 0.7846586345381525,
|
|
"calib/mu_c": 0.9229710144927536,
|
|
"calib/mu_w": 0.6127027027027027,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.25273092369477923,
|
|
"calib/std_conf": 0.3692910606631113,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4636363636363636,
|
|
"calib/step_q_c_n": 770.0,
|
|
"calib/step_q_gap": 0.15116311750081074,
|
|
"calib/step_q_w": 0.3124732461355529,
|
|
"calib/step_q_w_n": 841.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2918.0,
|
|
"completions/max_terminated_length": 2918.0,
|
|
"completions/mean_length": 560.40625,
|
|
"completions/mean_terminated_length": 562.6039428710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 130.0,
|
|
"epoch": 0.12693333333333334,
|
|
"grad_norm": 0.0336654931306839,
|
|
"kl": 0.0772705078125,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": -0.0541,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03116082400083542,
|
|
"mask/share_reasoning": 0.8479986190795898,
|
|
"mask/share_step_conf": 0.11693429946899414,
|
|
"num_tokens": 28652362.0,
|
|
"reward": 0.9408432841300964,
|
|
"reward_std": 0.20290254056453705,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/asymmetric_l2_reward": 0.8821717500686646,
|
|
"rewards/final_brier_reward_step": 0.6971710920333862,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 119
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6259399652481079,
|
|
"adv/mean_abs_reasoning": 0.38975387811660767,
|
|
"adv/mean_abs_step_conf": 0.7443583011627197,
|
|
"adv/ratio_final_to_reasoning": 1.6059877794489512,
|
|
"adv/ratio_step_to_reasoning": 1.9098162788261481,
|
|
"adv/std_final_conf": 0.81560218334198,
|
|
"adv/std_reasoning": 0.6612488627433777,
|
|
"adv/std_step_conf": 0.933025598526001,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7535215776667947,
|
|
"calib/avg_num_step_conf": 5.359375,
|
|
"calib/ece": 0.23131474103585664,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.6932270916334662,
|
|
"calib/gap": 0.4136899731079523,
|
|
"calib/mean_conf": 0.7381673306772908,
|
|
"calib/mu_c": 0.9260583941605839,
|
|
"calib/mu_w": 0.5123684210526316,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.21183266932270922,
|
|
"calib/std_conf": 0.40169539594567233,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4514522821576764,
|
|
"calib/step_q_c_n": 723.0,
|
|
"calib/step_q_gap": 0.10832439309758396,
|
|
"calib/step_q_w": 0.34312788906009245,
|
|
"calib/step_q_w_n": 649.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2444.0,
|
|
"completions/max_terminated_length": 2444.0,
|
|
"completions/mean_length": 492.75390625,
|
|
"completions/mean_terminated_length": 494.6863098144531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.03507012873888016,
|
|
"kl": 0.07172393798828125,
|
|
"learning_rate": 2.222222222222222e-06,
|
|
"loss": -0.0091,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03331432864069939,
|
|
"mask/share_reasoning": 0.845699667930603,
|
|
"mask/share_step_conf": 0.1170797273516655,
|
|
"num_tokens": 28885195.0,
|
|
"reward": 0.9636802673339844,
|
|
"reward_std": 0.18801307678222656,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/asymmetric_l2_reward": 0.8802074193954468,
|
|
"rewards/final_brier_reward_step": 0.7440280914306641,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 120
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.693418025970459,
|
|
"adv/mean_abs_reasoning": 0.5543345212936401,
|
|
"adv/mean_abs_step_conf": 0.734613835811615,
|
|
"adv/ratio_final_to_reasoning": 1.250901755770581,
|
|
"adv/ratio_step_to_reasoning": 1.325217549318885,
|
|
"adv/std_final_conf": 0.8799854516983032,
|
|
"adv/std_reasoning": 0.7755146622657776,
|
|
"adv/std_step_conf": 0.9336560368537903,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6444260520786912,
|
|
"calib/avg_num_step_conf": 6.58984375,
|
|
"calib/ece": 0.33027777777777784,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.7738095238095238,
|
|
"calib/gap": 0.20913541732985297,
|
|
"calib/mean_conf": 0.8230555555555555,
|
|
"calib/mu_c": 0.9168345323741007,
|
|
"calib/mu_w": 0.7076991150442478,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.30087301587301596,
|
|
"calib/std_conf": 0.33569725603172723,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.410047281323877,
|
|
"calib/step_q_c_n": 846.0,
|
|
"calib/step_q_gap": 0.07631363090770577,
|
|
"calib/step_q_w": 0.33373365041617126,
|
|
"calib/step_q_w_n": 841.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2541.0,
|
|
"completions/max_terminated_length": 2541.0,
|
|
"completions/mean_length": 566.36328125,
|
|
"completions/mean_terminated_length": 568.5843505859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.0,
|
|
"epoch": 0.12906666666666666,
|
|
"grad_norm": 0.05059061199426651,
|
|
"kl": 0.06748199462890625,
|
|
"learning_rate": 2.1944444444444445e-06,
|
|
"loss": 0.1015,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03129071742296219,
|
|
"mask/share_reasoning": 0.8439393043518066,
|
|
"mask/share_step_conf": 0.12086370587348938,
|
|
"num_tokens": 29135240.0,
|
|
"reward": 0.92536461353302,
|
|
"reward_std": 0.23206710815429688,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.8859968185424805,
|
|
"rewards/final_brier_reward_step": 0.6592636704444885,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 121
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.637088418006897,
|
|
"adv/mean_abs_reasoning": 0.4608699381351471,
|
|
"adv/mean_abs_step_conf": 0.7540398836135864,
|
|
"adv/ratio_final_to_reasoning": 1.3823605431606063,
|
|
"adv/ratio_step_to_reasoning": 1.63612295187817,
|
|
"adv/std_final_conf": 0.842920184135437,
|
|
"adv/std_reasoning": 0.7205365896224976,
|
|
"adv/std_step_conf": 0.9331433176994324,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.7871930050147871,
|
|
"calib/avg_num_step_conf": 5.69921875,
|
|
"calib/ece": 0.18192156862745107,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.6745098039215687,
|
|
"calib/gap": 0.4593088594573743,
|
|
"calib/mean_conf": 0.7199607843137256,
|
|
"calib/mu_c": 0.9018831168831168,
|
|
"calib/mu_w": 0.4425742574257425,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.14898039215686285,
|
|
"calib/std_conf": 0.40719171958838457,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4631063321385902,
|
|
"calib/step_q_c_n": 837.0,
|
|
"calib/step_q_gap": 0.13792948326399213,
|
|
"calib/step_q_w": 0.3251768488745981,
|
|
"calib/step_q_w_n": 622.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3011.0,
|
|
"completions/max_terminated_length": 3011.0,
|
|
"completions/mean_length": 493.8046875,
|
|
"completions/mean_terminated_length": 493.8046875,
|
|
"completions/min_length": 110.0,
|
|
"completions/min_terminated_length": 110.0,
|
|
"epoch": 0.13013333333333332,
|
|
"grad_norm": 0.060518212616443634,
|
|
"kl": 0.072021484375,
|
|
"learning_rate": 2.166666666666667e-06,
|
|
"loss": -0.0451,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.034646786749362946,
|
|
"mask/share_reasoning": 0.8425935506820679,
|
|
"mask/share_step_conf": 0.12275967001914978,
|
|
"num_tokens": 29368998.0,
|
|
"reward": 1.011244773864746,
|
|
"reward_std": 0.18983519077301025,
|
|
"rewards/accuracy_reward_step": 0.6015625,
|
|
"rewards/asymmetric_l2_reward": 0.9048250913619995,
|
|
"rewards/final_brier_reward_step": 0.7981331944465637,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 122
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7194132804870605,
|
|
"adv/mean_abs_reasoning": 0.5484261512756348,
|
|
"adv/mean_abs_step_conf": 0.7414064407348633,
|
|
"adv/ratio_final_to_reasoning": 1.3117778552567398,
|
|
"adv/ratio_step_to_reasoning": 1.3518801738581538,
|
|
"adv/std_final_conf": 0.8892462849617004,
|
|
"adv/std_reasoning": 0.792913556098938,
|
|
"adv/std_step_conf": 0.9334316253662109,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7523837706319458,
|
|
"calib/avg_num_step_conf": 6.34765625,
|
|
"calib/ece": 0.2340725806451614,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.5645161290322581,
|
|
"calib/gap": 0.3882784244098114,
|
|
"calib/mean_conf": 0.6297177419354838,
|
|
"calib/mu_c": 0.8035036496350366,
|
|
"calib/mu_w": 0.4152252252252252,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.15568548387096784,
|
|
"calib/std_conf": 0.431571959022196,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3978266331658291,
|
|
"calib/step_q_c_n": 796.0,
|
|
"calib/step_q_gap": 0.07244665729128147,
|
|
"calib/step_q_w": 0.32537997587454764,
|
|
"calib/step_q_w_n": 829.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2897.0,
|
|
"completions/max_terminated_length": 2897.0,
|
|
"completions/mean_length": 589.12109375,
|
|
"completions/mean_terminated_length": 591.431396484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.0,
|
|
"epoch": 0.1312,
|
|
"grad_norm": 0.038368623703718185,
|
|
"kl": 0.0644683837890625,
|
|
"learning_rate": 2.138888888888889e-06,
|
|
"loss": 0.0179,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.02929634228348732,
|
|
"mask/share_reasoning": 0.8502581119537354,
|
|
"mask/share_step_conf": 0.1165393590927124,
|
|
"num_tokens": 29625101.0,
|
|
"reward": 0.9518745541572571,
|
|
"reward_std": 0.21453779935836792,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/asymmetric_l2_reward": 0.8731791973114014,
|
|
"rewards/final_brier_reward_step": 0.7290074229240417,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 123
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6313794851303101,
|
|
"adv/mean_abs_reasoning": 0.46017879247665405,
|
|
"adv/mean_abs_step_conf": 0.7264816761016846,
|
|
"adv/ratio_final_to_reasoning": 1.3720308181354128,
|
|
"adv/ratio_step_to_reasoning": 1.578694385701272,
|
|
"adv/std_final_conf": 0.85479336977005,
|
|
"adv/std_reasoning": 0.7573562860488892,
|
|
"adv/std_step_conf": 0.9332188367843628,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7461487820934826,
|
|
"calib/avg_num_step_conf": 5.765625,
|
|
"calib/ece": 0.2508300395256916,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5810276679841897,
|
|
"calib/gap": 0.3679802501645819,
|
|
"calib/mean_conf": 0.6370750988142293,
|
|
"calib/mu_c": 0.7796129032258065,
|
|
"calib/mu_w": 0.41163265306122454,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.13762845849802363,
|
|
"calib/std_conf": 0.43541918185282724,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4402631578947368,
|
|
"calib/step_q_c_n": 874.0,
|
|
"calib/step_q_gap": 0.0714923937751355,
|
|
"calib/step_q_w": 0.3687707641196013,
|
|
"calib/step_q_w_n": 602.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2694.0,
|
|
"completions/max_terminated_length": 2694.0,
|
|
"completions/mean_length": 528.83203125,
|
|
"completions/mean_terminated_length": 528.83203125,
|
|
"completions/min_length": 121.0,
|
|
"completions/min_terminated_length": 121.0,
|
|
"epoch": 0.13226666666666667,
|
|
"grad_norm": 0.10215196013450623,
|
|
"kl": 0.08013153076171875,
|
|
"learning_rate": 2.1111111111111114e-06,
|
|
"loss": -0.0337,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03226089105010033,
|
|
"mask/share_reasoning": 0.8445743322372437,
|
|
"mask/share_step_conf": 0.12316481024026871,
|
|
"num_tokens": 29867298.0,
|
|
"reward": 0.9671303033828735,
|
|
"reward_std": 0.17288029193878174,
|
|
"rewards/accuracy_reward_step": 0.60546875,
|
|
"rewards/asymmetric_l2_reward": 0.8771121501922607,
|
|
"rewards/final_brier_reward_step": 0.7383984327316284,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 124
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7052055597305298,
|
|
"adv/mean_abs_reasoning": 0.4439585208892822,
|
|
"adv/mean_abs_step_conf": 0.7640683650970459,
|
|
"adv/ratio_final_to_reasoning": 1.5884492053851116,
|
|
"adv/ratio_step_to_reasoning": 1.7210354777436407,
|
|
"adv/std_final_conf": 0.8930036425590515,
|
|
"adv/std_reasoning": 0.739206075668335,
|
|
"adv/std_step_conf": 0.932820737361908,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6647173489278753,
|
|
"calib/avg_num_step_conf": 5.98828125,
|
|
"calib/ece": 0.31192771084337334,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.5943775100401606,
|
|
"calib/gap": 0.25868226120857696,
|
|
"calib/mean_conf": 0.635863453815261,
|
|
"calib/mu_c": 0.7542962962962962,
|
|
"calib/mu_w": 0.4956140350877193,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.20281124497991954,
|
|
"calib/std_conf": 0.4397463642706736,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.44329396325459314,
|
|
"calib/step_q_c_n": 762.0,
|
|
"calib/step_q_gap": 0.10667917726237525,
|
|
"calib/step_q_w": 0.3366147859922179,
|
|
"calib/step_q_w_n": 771.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2545.0,
|
|
"completions/max_terminated_length": 2545.0,
|
|
"completions/mean_length": 530.0,
|
|
"completions/mean_terminated_length": 534.1732177734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.13333333333333333,
|
|
"grad_norm": 0.03873305022716522,
|
|
"kl": 0.06960296630859375,
|
|
"learning_rate": 2.0833333333333334e-06,
|
|
"loss": -0.0536,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03307991474866867,
|
|
"mask/share_reasoning": 0.8385022878646851,
|
|
"mask/share_step_conf": 0.12060528248548508,
|
|
"num_tokens": 30107786.0,
|
|
"reward": 0.8997880220413208,
|
|
"reward_std": 0.20577451586723328,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/asymmetric_l2_reward": 0.8392912149429321,
|
|
"rewards/final_brier_reward_step": 0.6595035195350647,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 125
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6224679350852966,
|
|
"adv/mean_abs_reasoning": 0.45161741971969604,
|
|
"adv/mean_abs_step_conf": 0.7560645341873169,
|
|
"adv/ratio_final_to_reasoning": 1.378308072066046,
|
|
"adv/ratio_step_to_reasoning": 1.67412615451499,
|
|
"adv/std_final_conf": 0.8212738633155823,
|
|
"adv/std_reasoning": 0.7205803990364075,
|
|
"adv/std_step_conf": 0.931928813457489,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.797762148337596,
|
|
"calib/avg_num_step_conf": 6.77734375,
|
|
"calib/ece": 0.20988047808764945,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.5617529880478087,
|
|
"calib/gap": 0.43679859335038357,
|
|
"calib/mean_conf": 0.6259760956175299,
|
|
"calib/mu_c": 0.8261029411764705,
|
|
"calib/mu_w": 0.38930434782608697,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.1470119521912351,
|
|
"calib/std_conf": 0.43423622491700775,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42935049019607846,
|
|
"calib/step_q_c_n": 816.0,
|
|
"calib/step_q_gap": 0.1333330799675692,
|
|
"calib/step_q_w": 0.29601741022850925,
|
|
"calib/step_q_w_n": 919.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2574.0,
|
|
"completions/max_terminated_length": 2574.0,
|
|
"completions/mean_length": 555.703125,
|
|
"completions/mean_terminated_length": 557.8823852539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 115.0,
|
|
"epoch": 0.1344,
|
|
"grad_norm": 0.051755405962467194,
|
|
"kl": 0.06482315063476562,
|
|
"learning_rate": 2.0555555555555555e-06,
|
|
"loss": -0.027,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03272949904203415,
|
|
"mask/share_reasoning": 0.8290582299232483,
|
|
"mask/share_step_conf": 0.13430599868297577,
|
|
"num_tokens": 30355510.0,
|
|
"reward": 0.9703141450881958,
|
|
"reward_std": 0.1568230241537094,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/asymmetric_l2_reward": 0.8804025650024414,
|
|
"rewards/final_brier_reward_step": 0.7578819990158081,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 126
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.650374174118042,
|
|
"adv/mean_abs_reasoning": 0.46083492040634155,
|
|
"adv/mean_abs_step_conf": 0.7511869668960571,
|
|
"adv/ratio_final_to_reasoning": 1.4112953366133236,
|
|
"adv/ratio_step_to_reasoning": 1.630056520529515,
|
|
"adv/std_final_conf": 0.8628984093666077,
|
|
"adv/std_reasoning": 0.739283561706543,
|
|
"adv/std_step_conf": 0.9332513809204102,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.8018252933507171,
|
|
"calib/avg_num_step_conf": 6.26171875,
|
|
"calib/ece": 0.21907258064516133,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.5362903225806451,
|
|
"calib/gap": 0.46292959582790094,
|
|
"calib/mean_conf": 0.5784274193548387,
|
|
"calib/mu_c": 0.7986923076923077,
|
|
"calib/mu_w": 0.33576271186440676,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.13665322580645164,
|
|
"calib/std_conf": 0.4548098091224347,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4438493150684931,
|
|
"calib/step_q_c_n": 730.0,
|
|
"calib/step_q_gap": 0.14875194966184935,
|
|
"calib/step_q_w": 0.2950973654066438,
|
|
"calib/step_q_w_n": 873.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3006.0,
|
|
"completions/max_terminated_length": 3006.0,
|
|
"completions/mean_length": 549.046875,
|
|
"completions/mean_terminated_length": 549.046875,
|
|
"completions/min_length": 136.0,
|
|
"completions/min_terminated_length": 136.0,
|
|
"epoch": 0.13546666666666668,
|
|
"grad_norm": 0.035358961671590805,
|
|
"kl": 0.07138442993164062,
|
|
"learning_rate": 2.027777777777778e-06,
|
|
"loss": 0.1926,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03422444313764572,
|
|
"mask/share_reasoning": 0.831305980682373,
|
|
"mask/share_step_conf": 0.13446959853172302,
|
|
"num_tokens": 30599738.0,
|
|
"reward": 0.9445489645004272,
|
|
"reward_std": 0.20476898550987244,
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
|
"rewards/asymmetric_l2_reward": 0.860126793384552,
|
|
"rewards/final_brier_reward_step": 0.7360023260116577,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"step": 127
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.704669177532196,
|
|
"adv/mean_abs_reasoning": 0.5308483839035034,
|
|
"adv/mean_abs_step_conf": 0.761113166809082,
|
|
"adv/ratio_final_to_reasoning": 1.3274396209903305,
|
|
"adv/ratio_step_to_reasoning": 1.43376751232125,
|
|
"adv/std_final_conf": 0.85466468334198,
|
|
"adv/std_reasoning": 0.7577895522117615,
|
|
"adv/std_step_conf": 0.9339450001716614,
|
|
"calib/answer_extract_rate": 0.95703125,
|
|
"calib/auroc": 0.7072817820849318,
|
|
"calib/avg_num_step_conf": 5.4296875,
|
|
"calib/ece": 0.27745901639344256,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.9453125,
|
|
"calib/frac_conf_gt_0.9": 0.5778688524590164,
|
|
"calib/gap": 0.3277353792314422,
|
|
"calib/mean_conf": 0.6243442622950819,
|
|
"calib/mu_c": 0.781496062992126,
|
|
"calib/mu_w": 0.45376068376068385,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.190655737704918,
|
|
"calib/std_conf": 0.44240121191419035,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.4389984101748808,
|
|
"calib/step_q_c_n": 629.0,
|
|
"calib/step_q_gap": 0.10517449427474934,
|
|
"calib/step_q_w": 0.33382391590013144,
|
|
"calib/step_q_w_n": 761.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 3005.0,
|
|
"completions/max_terminated_length": 3005.0,
|
|
"completions/mean_length": 533.0390625,
|
|
"completions/mean_terminated_length": 539.3596801757812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.13653333333333334,
|
|
"grad_norm": 0.026687778532505035,
|
|
"kl": 0.0691680908203125,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": -0.0484,
|
|
"mask/has_final_conf_rate": 0.953125,
|
|
"mask/share_final_conf": 0.034037213772535324,
|
|
"mask/share_reasoning": 0.8381680846214294,
|
|
"mask/share_step_conf": 0.11607595533132553,
|
|
"num_tokens": 30842860.0,
|
|
"reward": 0.8984044790267944,
|
|
"reward_std": 0.2292974293231964,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/asymmetric_l2_reward": 0.8419756889343262,
|
|
"rewards/final_brier_reward_step": 0.666551947593689,
|
|
"rewards/format_reward_step": 0.9453125,
|
|
"step": 128
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5893256664276123,
|
|
"adv/mean_abs_reasoning": 0.3948642611503601,
|
|
"adv/mean_abs_step_conf": 0.7625530958175659,
|
|
"adv/ratio_final_to_reasoning": 1.4924765910967146,
|
|
"adv/ratio_step_to_reasoning": 1.9311778016982748,
|
|
"adv/std_final_conf": 0.8301159143447876,
|
|
"adv/std_reasoning": 0.6815221309661865,
|
|
"adv/std_step_conf": 0.9312430024147034,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6886455219030286,
|
|
"calib/avg_num_step_conf": 6.2890625,
|
|
"calib/ece": 0.2615294117647058,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.6627450980392157,
|
|
"calib/gap": 0.2917977382035617,
|
|
"calib/mean_conf": 0.7284313725490196,
|
|
"calib/mu_c": 0.8405732484076434,
|
|
"calib/mu_w": 0.5487755102040817,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.18713725490196073,
|
|
"calib/std_conf": 0.4012772380298454,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.43962121212121213,
|
|
"calib/step_q_c_n": 924.0,
|
|
"calib/step_q_gap": 0.10281363194628501,
|
|
"calib/step_q_w": 0.3368075801749271,
|
|
"calib/step_q_w_n": 686.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2646.0,
|
|
"completions/max_terminated_length": 2646.0,
|
|
"completions/mean_length": 483.7578125,
|
|
"completions/mean_terminated_length": 483.7578125,
|
|
"completions/min_length": 140.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.1376,
|
|
"grad_norm": 0.042509667575359344,
|
|
"kl": 0.07884979248046875,
|
|
"learning_rate": 1.9722222222222224e-06,
|
|
"loss": 0.0618,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.0361248143017292,
|
|
"mask/share_reasoning": 0.8258543610572815,
|
|
"mask/share_step_conf": 0.1380208432674408,
|
|
"num_tokens": 31069086.0,
|
|
"reward": 0.970730721950531,
|
|
"reward_std": 0.1526256501674652,
|
|
"rewards/accuracy_reward_step": 0.61328125,
|
|
"rewards/asymmetric_l2_reward": 0.8946923017501831,
|
|
"rewards/final_brier_reward_step": 0.7248941659927368,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 129
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5929268002510071,
|
|
"adv/mean_abs_reasoning": 0.2917104959487915,
|
|
"adv/mean_abs_step_conf": 0.7566444873809814,
|
|
"adv/ratio_final_to_reasoning": 2.0325864461013183,
|
|
"adv/ratio_step_to_reasoning": 2.5938198929729532,
|
|
"adv/std_final_conf": 0.8083614706993103,
|
|
"adv/std_reasoning": 0.5727423429489136,
|
|
"adv/std_step_conf": 0.9330140948295593,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7233320147679325,
|
|
"calib/avg_num_step_conf": 5.33203125,
|
|
"calib/ece": 0.2249212598425196,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.6889763779527559,
|
|
"calib/gap": 0.385006592827004,
|
|
"calib/mean_conf": 0.7233464566929134,
|
|
"calib/mu_c": 0.8688607594936707,
|
|
"calib/mu_w": 0.4838541666666667,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.16311023622047235,
|
|
"calib/std_conf": 0.41253229657299734,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.48749683944374206,
|
|
"calib/step_q_c_n": 791.0,
|
|
"calib/step_q_gap": 0.103942832475101,
|
|
"calib/step_q_w": 0.38355400696864106,
|
|
"calib/step_q_w_n": 574.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2842.0,
|
|
"completions/max_terminated_length": 2842.0,
|
|
"completions/mean_length": 471.3984375,
|
|
"completions/mean_terminated_length": 471.3984375,
|
|
"completions/min_length": 94.0,
|
|
"completions/min_terminated_length": 94.0,
|
|
"epoch": 0.13866666666666666,
|
|
"grad_norm": 0.06835480779409409,
|
|
"kl": 0.07553863525390625,
|
|
"learning_rate": 1.944444444444445e-06,
|
|
"loss": -0.0023,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03732430934906006,
|
|
"mask/share_reasoning": 0.8383286595344543,
|
|
"mask/share_step_conf": 0.1243470311164856,
|
|
"num_tokens": 31295052.0,
|
|
"reward": 0.9831303358078003,
|
|
"reward_std": 0.14991185069084167,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.8848813772201538,
|
|
"rewards/final_brier_reward_step": 0.7595043182373047,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 130
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6311858892440796,
|
|
"adv/mean_abs_reasoning": 0.2988106906414032,
|
|
"adv/mean_abs_step_conf": 0.7364592552185059,
|
|
"adv/ratio_final_to_reasoning": 2.1123269983722013,
|
|
"adv/ratio_step_to_reasoning": 2.4646348952163697,
|
|
"adv/std_final_conf": 0.8326376676559448,
|
|
"adv/std_reasoning": 0.6184049248695374,
|
|
"adv/std_step_conf": 0.9334642291069031,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7967821142414061,
|
|
"calib/avg_num_step_conf": 5.234375,
|
|
"calib/ece": 0.2596825396825397,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.5992063492063492,
|
|
"calib/gap": 0.44134401654174205,
|
|
"calib/mean_conf": 0.6542063492063492,
|
|
"calib/mu_c": 0.9099056603773585,
|
|
"calib/mu_w": 0.4685616438356165,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.24662698412698414,
|
|
"calib/std_conf": 0.43570204828566333,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4780145719489981,
|
|
"calib/step_q_c_n": 549.0,
|
|
"calib/step_q_gap": 0.13446210671511694,
|
|
"calib/step_q_w": 0.34355246523388117,
|
|
"calib/step_q_w_n": 791.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2566.0,
|
|
"completions/max_terminated_length": 2566.0,
|
|
"completions/mean_length": 479.078125,
|
|
"completions/mean_terminated_length": 479.078125,
|
|
"completions/min_length": 169.0,
|
|
"completions/min_terminated_length": 169.0,
|
|
"epoch": 0.13973333333333332,
|
|
"grad_norm": 0.06330462545156479,
|
|
"kl": 0.06920623779296875,
|
|
"learning_rate": 1.916666666666667e-06,
|
|
"loss": -0.0458,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03542075306177139,
|
|
"mask/share_reasoning": 0.8427149057388306,
|
|
"mask/share_step_conf": 0.12186426669359207,
|
|
"num_tokens": 31523904.0,
|
|
"reward": 0.9323045015335083,
|
|
"reward_std": 0.1720176488161087,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/asymmetric_l2_reward": 0.8692620396614075,
|
|
"rewards/final_brier_reward_step": 0.7156593799591064,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 131
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5419154167175293,
|
|
"adv/mean_abs_reasoning": 0.39009517431259155,
|
|
"adv/mean_abs_step_conf": 0.7669232487678528,
|
|
"adv/ratio_final_to_reasoning": 1.3891876967523853,
|
|
"adv/ratio_step_to_reasoning": 1.9659900949025861,
|
|
"adv/std_final_conf": 0.7634062170982361,
|
|
"adv/std_reasoning": 0.6612535119056702,
|
|
"adv/std_step_conf": 0.932680070400238,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7523074894514769,
|
|
"calib/avg_num_step_conf": 5.609375,
|
|
"calib/ece": 0.24881889763779533,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.7007874015748031,
|
|
"calib/gap": 0.348175105485232,
|
|
"calib/mean_conf": 0.7357480314960629,
|
|
"calib/mu_c": 0.8673417721518987,
|
|
"calib/mu_w": 0.5191666666666667,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.1812598425196851,
|
|
"calib/std_conf": 0.40682372449389625,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4939951573849879,
|
|
"calib/step_q_c_n": 826.0,
|
|
"calib/step_q_gap": 0.158306632794824,
|
|
"calib/step_q_w": 0.3356885245901639,
|
|
"calib/step_q_w_n": 610.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2357.0,
|
|
"completions/max_terminated_length": 2357.0,
|
|
"completions/mean_length": 509.984375,
|
|
"completions/mean_terminated_length": 509.984375,
|
|
"completions/min_length": 140.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.1408,
|
|
"grad_norm": 0.027949035167694092,
|
|
"kl": 0.0719757080078125,
|
|
"learning_rate": 1.888888888888889e-06,
|
|
"loss": 0.0022,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03615806996822357,
|
|
"mask/share_reasoning": 0.8362313508987427,
|
|
"mask/share_step_conf": 0.12761051952838898,
|
|
"num_tokens": 31760052.0,
|
|
"reward": 0.9794524908065796,
|
|
"reward_std": 0.16386666893959045,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.8927135467529297,
|
|
"rewards/final_brier_reward_step": 0.7443163394927979,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 132
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7018519043922424,
|
|
"adv/mean_abs_reasoning": 0.4902651607990265,
|
|
"adv/mean_abs_step_conf": 0.7454802989959717,
|
|
"adv/ratio_final_to_reasoning": 1.4315761357555474,
|
|
"adv/ratio_step_to_reasoning": 1.5205655196485908,
|
|
"adv/std_final_conf": 0.8919135332107544,
|
|
"adv/std_reasoning": 0.7752693295478821,
|
|
"adv/std_step_conf": 0.9336416721343994,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7900599270453361,
|
|
"calib/avg_num_step_conf": 6.7890625,
|
|
"calib/ece": 0.266600790513834,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5296442687747036,
|
|
"calib/gap": 0.39868290776446064,
|
|
"calib/mean_conf": 0.5900790513833992,
|
|
"calib/mu_c": 0.8296039603960396,
|
|
"calib/mu_w": 0.4309210526315789,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.22873517786561268,
|
|
"calib/std_conf": 0.44594793302387903,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.40628205128205136,
|
|
"calib/step_q_c_n": 702.0,
|
|
"calib/step_q_gap": 0.07311602811602819,
|
|
"calib/step_q_w": 0.3331660231660232,
|
|
"calib/step_q_w_n": 1036.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2896.0,
|
|
"completions/max_terminated_length": 2896.0,
|
|
"completions/mean_length": 595.953125,
|
|
"completions/mean_terminated_length": 598.2902221679688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.0,
|
|
"epoch": 0.14186666666666667,
|
|
"grad_norm": 0.05951628088951111,
|
|
"kl": 0.0603179931640625,
|
|
"learning_rate": 1.8611111111111113e-06,
|
|
"loss": -0.0138,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.028608456254005432,
|
|
"mask/share_reasoning": 0.8479064702987671,
|
|
"mask/share_step_conf": 0.11957882344722748,
|
|
"num_tokens": 32018960.0,
|
|
"reward": 0.9371469020843506,
|
|
"reward_std": 0.21386194229125977,
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
|
"rewards/asymmetric_l2_reward": 0.8900238871574402,
|
|
"rewards/final_brier_reward_step": 0.707707405090332,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 133
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7257549166679382,
|
|
"adv/mean_abs_reasoning": 0.49450692534446716,
|
|
"adv/mean_abs_step_conf": 0.752888560295105,
|
|
"adv/ratio_final_to_reasoning": 1.4676334738131052,
|
|
"adv/ratio_step_to_reasoning": 1.522503572160597,
|
|
"adv/std_final_conf": 0.8783618807792664,
|
|
"adv/std_reasoning": 0.7394267320632935,
|
|
"adv/std_step_conf": 0.9331688284873962,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6966345244847797,
|
|
"calib/avg_num_step_conf": 5.9453125,
|
|
"calib/ece": 0.30107142857142855,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.6111111111111112,
|
|
"calib/gap": 0.30402155416903004,
|
|
"calib/mean_conf": 0.6541666666666667,
|
|
"calib/mu_c": 0.8025581395348838,
|
|
"calib/mu_w": 0.4985365853658537,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.22166666666666668,
|
|
"calib/std_conf": 0.4401944240359945,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.42251948051948046,
|
|
"calib/step_q_c_n": 770.0,
|
|
"calib/step_q_gap": 0.09624288477479959,
|
|
"calib/step_q_w": 0.3262765957446809,
|
|
"calib/step_q_w_n": 752.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2497.0,
|
|
"completions/max_terminated_length": 2497.0,
|
|
"completions/mean_length": 585.1484375,
|
|
"completions/mean_terminated_length": 587.4431762695312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 176.0,
|
|
"epoch": 0.14293333333333333,
|
|
"grad_norm": 0.02902313508093357,
|
|
"kl": 0.0628509521484375,
|
|
"learning_rate": 1.8333333333333333e-06,
|
|
"loss": -0.0718,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.030042003840208054,
|
|
"mask/share_reasoning": 0.8552829027175903,
|
|
"mask/share_step_conf": 0.11076889932155609,
|
|
"num_tokens": 32277710.0,
|
|
"reward": 0.925879716873169,
|
|
"reward_std": 0.22603853046894073,
|
|
"rewards/accuracy_reward_step": 0.50390625,
|
|
"rewards/asymmetric_l2_reward": 0.8767973184585571,
|
|
"rewards/final_brier_reward_step": 0.6773058176040649,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 134
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6498154997825623,
|
|
"adv/mean_abs_reasoning": 0.3868643045425415,
|
|
"adv/mean_abs_step_conf": 0.7391627430915833,
|
|
"adv/ratio_final_to_reasoning": 1.6796987784927708,
|
|
"adv/ratio_step_to_reasoning": 1.9106511880583732,
|
|
"adv/std_final_conf": 0.8298484683036804,
|
|
"adv/std_reasoning": 0.6614306569099426,
|
|
"adv/std_step_conf": 0.9338327646255493,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6963808760683761,
|
|
"calib/avg_num_step_conf": 6.21484375,
|
|
"calib/ece": 0.2699592741935484,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.625,
|
|
"calib/gap": 0.33560432692307685,
|
|
"calib/mean_conf": 0.6729439516129033,
|
|
"calib/mu_c": 0.8136812499999999,
|
|
"calib/mu_w": 0.47807692307692307,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.18112903225806457,
|
|
"calib/std_conf": 0.4334902361093346,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4100843373493976,
|
|
"calib/step_q_c_n": 830.0,
|
|
"calib/step_q_gap": 0.08741679464243307,
|
|
"calib/step_q_w": 0.3226675427069645,
|
|
"calib/step_q_w_n": 761.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2746.0,
|
|
"completions/max_terminated_length": 2746.0,
|
|
"completions/mean_length": 573.79296875,
|
|
"completions/mean_terminated_length": 573.79296875,
|
|
"completions/min_length": 171.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.03258584067225456,
|
|
"kl": 0.058887481689453125,
|
|
"learning_rate": 1.8055555555555557e-06,
|
|
"loss": 0.0267,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03373143821954727,
|
|
"mask/share_reasoning": 0.8400686383247375,
|
|
"mask/share_step_conf": 0.12619991600513458,
|
|
"num_tokens": 32530481.0,
|
|
"reward": 0.9410494565963745,
|
|
"reward_std": 0.21206125617027283,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/asymmetric_l2_reward": 0.8774411678314209,
|
|
"rewards/final_brier_reward_step": 0.6991890668869019,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 135
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6550817489624023,
|
|
"adv/mean_abs_reasoning": 0.5279697775840759,
|
|
"adv/mean_abs_step_conf": 0.7136498093605042,
|
|
"adv/ratio_final_to_reasoning": 1.240756150778128,
|
|
"adv/ratio_step_to_reasoning": 1.3516868572024652,
|
|
"adv/std_final_conf": 0.8621448874473572,
|
|
"adv/std_reasoning": 0.7927603721618652,
|
|
"adv/std_step_conf": 0.9325664043426514,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7687752016129032,
|
|
"calib/avg_num_step_conf": 6.453125,
|
|
"calib/ece": 0.25138888888888883,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.5317460317460317,
|
|
"calib/gap": 0.3933266129032257,
|
|
"calib/mean_conf": 0.5791666666666666,
|
|
"calib/mu_c": 0.7789516129032257,
|
|
"calib/mu_w": 0.385625,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.16924603174603167,
|
|
"calib/std_conf": 0.4533461351483745,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.42105726872246696,
|
|
"calib/step_q_c_n": 681.0,
|
|
"calib/step_q_gap": 0.16384820590063381,
|
|
"calib/step_q_w": 0.25720906282183315,
|
|
"calib/step_q_w_n": 971.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2577.0,
|
|
"completions/max_terminated_length": 2577.0,
|
|
"completions/mean_length": 528.12890625,
|
|
"completions/mean_terminated_length": 530.2000122070312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 169.0,
|
|
"epoch": 0.14506666666666668,
|
|
"grad_norm": 0.04807325452566147,
|
|
"kl": 0.07080078125,
|
|
"learning_rate": 1.777777777777778e-06,
|
|
"loss": 0.0226,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.032773517072200775,
|
|
"mask/share_reasoning": 0.830197274684906,
|
|
"mask/share_step_conf": 0.13312296569347382,
|
|
"num_tokens": 32774170.0,
|
|
"reward": 0.9568853974342346,
|
|
"reward_std": 0.19861853122711182,
|
|
"rewards/accuracy_reward_step": 0.484375,
|
|
"rewards/asymmetric_l2_reward": 0.9026017189025879,
|
|
"rewards/final_brier_reward_step": 0.7182003855705261,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 136
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6266046762466431,
|
|
"adv/mean_abs_reasoning": 0.4091912508010864,
|
|
"adv/mean_abs_step_conf": 0.7550874352455139,
|
|
"adv/ratio_final_to_reasoning": 1.5313247167917683,
|
|
"adv/ratio_step_to_reasoning": 1.8453166673707118,
|
|
"adv/std_final_conf": 0.8365187644958496,
|
|
"adv/std_reasoning": 0.6816372275352478,
|
|
"adv/std_step_conf": 0.9328610301017761,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.7627720120522263,
|
|
"calib/avg_num_step_conf": 6.84375,
|
|
"calib/ece": 0.22254032258064518,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.6008064516129032,
|
|
"calib/gap": 0.42354670237696684,
|
|
"calib/mean_conf": 0.6422983870967742,
|
|
"calib/mu_c": 0.8182068965517242,
|
|
"calib/mu_w": 0.39466019417475734,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.14008064516129035,
|
|
"calib/std_conf": 0.44154781788756414,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.40336353340883346,
|
|
"calib/step_q_c_n": 883.0,
|
|
"calib/step_q_gap": 0.13861094422586456,
|
|
"calib/step_q_w": 0.2647525891829689,
|
|
"calib/step_q_w_n": 869.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 3003.0,
|
|
"completions/max_terminated_length": 3003.0,
|
|
"completions/mean_length": 539.8125,
|
|
"completions/mean_terminated_length": 546.2134399414062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.14613333333333334,
|
|
"grad_norm": 0.04263077676296234,
|
|
"kl": 0.0669097900390625,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": -0.0123,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03168744221329689,
|
|
"mask/share_reasoning": 0.8275086879730225,
|
|
"mask/share_step_conf": 0.12908512353897095,
|
|
"num_tokens": 33019346.0,
|
|
"reward": 0.9610379934310913,
|
|
"reward_std": 0.19014747440814972,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/asymmetric_l2_reward": 0.8790087103843689,
|
|
"rewards/final_brier_reward_step": 0.7368171215057373,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 137
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6476581692695618,
|
|
"adv/mean_abs_reasoning": 0.5334905385971069,
|
|
"adv/mean_abs_step_conf": 0.7643733620643616,
|
|
"adv/ratio_final_to_reasoning": 1.2140012285366404,
|
|
"adv/ratio_step_to_reasoning": 1.4327777284942962,
|
|
"adv/std_final_conf": 0.8558839559555054,
|
|
"adv/std_reasoning": 0.7754238247871399,
|
|
"adv/std_step_conf": 0.9321677684783936,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7955010224948875,
|
|
"calib/avg_num_step_conf": 5.7734375,
|
|
"calib/ece": 0.18932806324110668,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.5968379446640316,
|
|
"calib/gap": 0.4857866394001363,
|
|
"calib/mean_conf": 0.6537549407114623,
|
|
"calib/mu_c": 0.8265644171779141,
|
|
"calib/mu_w": 0.3407777777777778,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.09940711462450587,
|
|
"calib/std_conf": 0.42963415269661276,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4080204778156996,
|
|
"calib/step_q_c_n": 879.0,
|
|
"calib/step_q_gap": 0.1297233158791387,
|
|
"calib/step_q_w": 0.2782971619365609,
|
|
"calib/step_q_w_n": 599.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2661.0,
|
|
"completions/max_terminated_length": 2661.0,
|
|
"completions/mean_length": 496.4921875,
|
|
"completions/mean_terminated_length": 498.4392395019531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 136.0,
|
|
"epoch": 0.1472,
|
|
"grad_norm": 0.04252735897898674,
|
|
"kl": 0.07735443115234375,
|
|
"learning_rate": 1.7222222222222224e-06,
|
|
"loss": -0.0624,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03474820777773857,
|
|
"mask/share_reasoning": 0.8353586792945862,
|
|
"mask/share_step_conf": 0.12598684430122375,
|
|
"num_tokens": 33250784.0,
|
|
"reward": 1.007904052734375,
|
|
"reward_std": 0.1891259253025055,
|
|
"rewards/accuracy_reward_step": 0.63671875,
|
|
"rewards/asymmetric_l2_reward": 0.8937262892723083,
|
|
"rewards/final_brier_reward_step": 0.7978628873825073,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 138
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6581767797470093,
|
|
"adv/mean_abs_reasoning": 0.42416638135910034,
|
|
"adv/mean_abs_step_conf": 0.7499033212661743,
|
|
"adv/ratio_final_to_reasoning": 1.5516948270112787,
|
|
"adv/ratio_step_to_reasoning": 1.7679461509027616,
|
|
"adv/std_final_conf": 0.8394871950149536,
|
|
"adv/std_reasoning": 0.7013220191001892,
|
|
"adv/std_step_conf": 0.9318543672561646,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7805037191700379,
|
|
"calib/avg_num_step_conf": 5.53515625,
|
|
"calib/ece": 0.19721568627450975,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.5450980392156862,
|
|
"calib/gap": 0.4600561137935535,
|
|
"calib/mean_conf": 0.6047450980392157,
|
|
"calib/mu_c": 0.779746835443038,
|
|
"calib/mu_w": 0.31969072164948453,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.09117647058823526,
|
|
"calib/std_conf": 0.4437459016386166,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4019053398058252,
|
|
"calib/step_q_c_n": 824.0,
|
|
"calib/step_q_gap": 0.10600314756299212,
|
|
"calib/step_q_w": 0.2959021922428331,
|
|
"calib/step_q_w_n": 593.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2967.0,
|
|
"completions/max_terminated_length": 2967.0,
|
|
"completions/mean_length": 459.67578125,
|
|
"completions/mean_terminated_length": 459.67578125,
|
|
"completions/min_length": 151.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.14826666666666666,
|
|
"grad_norm": 0.07648757100105286,
|
|
"kl": 0.086883544921875,
|
|
"learning_rate": 1.6944444444444446e-06,
|
|
"loss": 0.0081,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03702244907617569,
|
|
"mask/share_reasoning": 0.8356660604476929,
|
|
"mask/share_step_conf": 0.12731149792671204,
|
|
"num_tokens": 33471557.0,
|
|
"reward": 1.0029046535491943,
|
|
"reward_std": 0.16366134583950043,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.9031118750572205,
|
|
"rewards/final_brier_reward_step": 0.7808222770690918,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 139
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5114564895629883,
|
|
"adv/mean_abs_reasoning": 0.33423930406570435,
|
|
"adv/mean_abs_step_conf": 0.7621335983276367,
|
|
"adv/ratio_final_to_reasoning": 1.5302104909315117,
|
|
"adv/ratio_step_to_reasoning": 2.280203402343781,
|
|
"adv/std_final_conf": 0.7727295160293579,
|
|
"adv/std_reasoning": 0.6610760688781738,
|
|
"adv/std_step_conf": 0.9312514066696167,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.8144219396806622,
|
|
"calib/avg_num_step_conf": 5.375,
|
|
"calib/ece": 0.1538976377952756,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.6850393700787402,
|
|
"calib/gap": 0.5125340035481964,
|
|
"calib/mean_conf": 0.7275984251968504,
|
|
"calib/mu_c": 0.8809550561797753,
|
|
"calib/mu_w": 0.3684210526315789,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0903543307086614,
|
|
"calib/std_conf": 0.4108740205767719,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.44658747300215984,
|
|
"calib/step_q_c_n": 926.0,
|
|
"calib/step_q_gap": 0.15103191744660427,
|
|
"calib/step_q_w": 0.29555555555555557,
|
|
"calib/step_q_w_n": 450.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2630.0,
|
|
"completions/max_terminated_length": 2630.0,
|
|
"completions/mean_length": 500.43359375,
|
|
"completions/mean_terminated_length": 500.43359375,
|
|
"completions/min_length": 133.0,
|
|
"completions/min_terminated_length": 133.0,
|
|
"epoch": 0.14933333333333335,
|
|
"grad_norm": 0.041224293410778046,
|
|
"kl": 0.0767364501953125,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 0.0673,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03436052054166794,
|
|
"mask/share_reasoning": 0.8470271825790405,
|
|
"mask/share_step_conf": 0.11861232668161392,
|
|
"num_tokens": 33704684.0,
|
|
"reward": 1.0392565727233887,
|
|
"reward_std": 0.12018037587404251,
|
|
"rewards/accuracy_reward_step": 0.6953125,
|
|
"rewards/asymmetric_l2_reward": 0.9118223190307617,
|
|
"rewards/final_brier_reward_step": 0.8291909694671631,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 140
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.49427223205566406,
|
|
"adv/mean_abs_reasoning": 0.4047102928161621,
|
|
"adv/mean_abs_step_conf": 0.7349098920822144,
|
|
"adv/ratio_final_to_reasoning": 1.2212988916498475,
|
|
"adv/ratio_step_to_reasoning": 1.8158912810652037,
|
|
"adv/std_final_conf": 0.7135685682296753,
|
|
"adv/std_reasoning": 0.6816495060920715,
|
|
"adv/std_step_conf": 0.9325038194656372,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.881882576310793,
|
|
"calib/avg_num_step_conf": 6.0390625,
|
|
"calib/ece": 0.10734939759036147,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.642570281124498,
|
|
"calib/gap": 0.6617306849715201,
|
|
"calib/mean_conf": 0.6774698795180722,
|
|
"calib/mu_c": 0.895389221556886,
|
|
"calib/mu_w": 0.2336585365853659,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.057068273092369504,
|
|
"calib/std_conf": 0.43568395210389493,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.39104166666666673,
|
|
"calib/step_q_c_n": 1008.0,
|
|
"calib/step_q_gap": 0.12533534696406445,
|
|
"calib/step_q_w": 0.2657063197026023,
|
|
"calib/step_q_w_n": 538.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2210.0,
|
|
"completions/max_terminated_length": 2210.0,
|
|
"completions/mean_length": 538.55078125,
|
|
"completions/mean_terminated_length": 540.6627807617188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 123.0,
|
|
"epoch": 0.1504,
|
|
"grad_norm": 0.03272243216633797,
|
|
"kl": 0.063079833984375,
|
|
"learning_rate": 1.638888888888889e-06,
|
|
"loss": 0.0454,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.032417528331279755,
|
|
"mask/share_reasoning": 0.8407843112945557,
|
|
"mask/share_step_conf": 0.12289191037416458,
|
|
"num_tokens": 33949649.0,
|
|
"reward": 1.0384899377822876,
|
|
"reward_std": 0.14921121299266815,
|
|
"rewards/accuracy_reward_step": 0.65234375,
|
|
"rewards/asymmetric_l2_reward": 0.8945099115371704,
|
|
"rewards/final_brier_reward_step": 0.857469916343689,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 141
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6672189831733704,
|
|
"adv/mean_abs_reasoning": 0.5137597918510437,
|
|
"adv/mean_abs_step_conf": 0.7419067621231079,
|
|
"adv/ratio_final_to_reasoning": 1.2986983289786518,
|
|
"adv/ratio_step_to_reasoning": 1.4440732301180388,
|
|
"adv/std_final_conf": 0.8465292453765869,
|
|
"adv/std_reasoning": 0.7576342821121216,
|
|
"adv/std_step_conf": 0.9329071640968323,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7659574468085107,
|
|
"calib/avg_num_step_conf": 6.24609375,
|
|
"calib/ece": 0.2413095238095237,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.5396825396825397,
|
|
"calib/gap": 0.40900709219858167,
|
|
"calib/mean_conf": 0.5888492063492063,
|
|
"calib/mu_c": 0.7690070921985817,
|
|
"calib/mu_w": 0.36,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.1353174603174602,
|
|
"calib/std_conf": 0.45452646853028067,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3878585308056873,
|
|
"calib/step_q_c_n": 844.0,
|
|
"calib/step_q_gap": 0.09809694140171377,
|
|
"calib/step_q_w": 0.2897615894039735,
|
|
"calib/step_q_w_n": 755.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2580.0,
|
|
"completions/max_terminated_length": 2580.0,
|
|
"completions/mean_length": 547.94921875,
|
|
"completions/mean_terminated_length": 547.94921875,
|
|
"completions/min_length": 167.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.15146666666666667,
|
|
"grad_norm": 0.045452363789081573,
|
|
"kl": 0.07000732421875,
|
|
"learning_rate": 1.6111111111111113e-06,
|
|
"loss": 0.0256,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03237912803888321,
|
|
"mask/share_reasoning": 0.842139482498169,
|
|
"mask/share_step_conf": 0.12548136711120605,
|
|
"num_tokens": 34195084.0,
|
|
"reward": 0.9666558504104614,
|
|
"reward_std": 0.18624988198280334,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/asymmetric_l2_reward": 0.8902691602706909,
|
|
"rewards/final_brier_reward_step": 0.736011266708374,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 142
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5834546089172363,
|
|
"adv/mean_abs_reasoning": 0.4283401668071747,
|
|
"adv/mean_abs_step_conf": 0.7408386468887329,
|
|
"adv/ratio_final_to_reasoning": 1.3621291070278947,
|
|
"adv/ratio_step_to_reasoning": 1.7295567969049124,
|
|
"adv/std_final_conf": 0.8025461435317993,
|
|
"adv/std_reasoning": 0.7204607129096985,
|
|
"adv/std_step_conf": 0.9318289160728455,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.8720285790598292,
|
|
"calib/avg_num_step_conf": 6.3671875,
|
|
"calib/ece": 0.1430645161290321,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.6008064516129032,
|
|
"calib/gap": 0.5630608974358975,
|
|
"calib/mean_conf": 0.6624193548387097,
|
|
"calib/mu_c": 0.8985416666666668,
|
|
"calib/mu_w": 0.33548076923076925,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.11241935483870953,
|
|
"calib/std_conf": 0.4251126434551085,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4168470906630582,
|
|
"calib/step_q_c_n": 739.0,
|
|
"calib/step_q_gap": 0.19198738247001668,
|
|
"calib/step_q_w": 0.22485970819304152,
|
|
"calib/step_q_w_n": 891.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3056.0,
|
|
"completions/max_terminated_length": 3056.0,
|
|
"completions/mean_length": 567.859375,
|
|
"completions/mean_terminated_length": 567.859375,
|
|
"completions/min_length": 161.0,
|
|
"completions/min_terminated_length": 161.0,
|
|
"epoch": 0.15253333333333333,
|
|
"grad_norm": 0.05021341145038605,
|
|
"kl": 0.06475830078125,
|
|
"learning_rate": 1.5833333333333333e-06,
|
|
"loss": 0.0437,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.0319594144821167,
|
|
"mask/share_reasoning": 0.8436037302017212,
|
|
"mask/share_step_conf": 0.12443678081035614,
|
|
"num_tokens": 34447792.0,
|
|
"reward": 1.0080523490905762,
|
|
"reward_std": 0.16858679056167603,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/asymmetric_l2_reward": 0.892905592918396,
|
|
"rewards/final_brier_reward_step": 0.8169492483139038,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 143
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5916790962219238,
|
|
"adv/mean_abs_reasoning": 0.46679311990737915,
|
|
"adv/mean_abs_step_conf": 0.7288160920143127,
|
|
"adv/ratio_final_to_reasoning": 1.2675403106612293,
|
|
"adv/ratio_step_to_reasoning": 1.5613256942581417,
|
|
"adv/std_final_conf": 0.8130344152450562,
|
|
"adv/std_reasoning": 0.7393408417701721,
|
|
"adv/std_step_conf": 0.9324739575386047,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7388970588235293,
|
|
"calib/avg_num_step_conf": 5.75,
|
|
"calib/ece": 0.24780000000000002,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.6,
|
|
"calib/gap": 0.39191911764705895,
|
|
"calib/mean_conf": 0.63188,
|
|
"calib/mu_c": 0.7572941176470589,
|
|
"calib/mu_w": 0.36537499999999995,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.09984000000000003,
|
|
"calib/std_conf": 0.4510858738643896,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.3856683937823834,
|
|
"calib/step_q_c_n": 965.0,
|
|
"calib/step_q_gap": 0.09519502100131827,
|
|
"calib/step_q_w": 0.2904733727810651,
|
|
"calib/step_q_w_n": 507.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2804.0,
|
|
"completions/max_terminated_length": 2804.0,
|
|
"completions/mean_length": 542.84765625,
|
|
"completions/mean_terminated_length": 544.9765014648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 144.0,
|
|
"epoch": 0.1536,
|
|
"grad_norm": 0.04553085193037987,
|
|
"kl": 0.0696868896484375,
|
|
"learning_rate": 1.5555555555555558e-06,
|
|
"loss": -0.0059,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.033757805824279785,
|
|
"mask/share_reasoning": 0.8401012420654297,
|
|
"mask/share_step_conf": 0.12223471701145172,
|
|
"num_tokens": 34690889.0,
|
|
"reward": 0.9717831611633301,
|
|
"reward_std": 0.16983790695667267,
|
|
"rewards/accuracy_reward_step": 0.6640625,
|
|
"rewards/asymmetric_l2_reward": 0.8857837915420532,
|
|
"rewards/final_brier_reward_step": 0.7296574115753174,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 144
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6282063722610474,
|
|
"adv/mean_abs_reasoning": 0.5565600395202637,
|
|
"adv/mean_abs_step_conf": 0.7417995929718018,
|
|
"adv/ratio_final_to_reasoning": 1.128730644770222,
|
|
"adv/ratio_step_to_reasoning": 1.3328294169506105,
|
|
"adv/std_final_conf": 0.8449404835700989,
|
|
"adv/std_reasoning": 0.8097800612449646,
|
|
"adv/std_step_conf": 0.9331483244895935,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6935672514619883,
|
|
"calib/avg_num_step_conf": 6.8046875,
|
|
"calib/ece": 0.2536254980079681,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.7171314741035857,
|
|
"calib/gap": 0.255464912280702,
|
|
"calib/mean_conf": 0.7645418326693229,
|
|
"calib/mu_c": 0.8459649122807019,
|
|
"calib/mu_w": 0.5904999999999999,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.16844621513944222,
|
|
"calib/std_conf": 0.3863622472481406,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.40837301587301583,
|
|
"calib/step_q_c_n": 1008.0,
|
|
"calib/step_q_gap": 0.13369999134985505,
|
|
"calib/step_q_w": 0.2746730245231608,
|
|
"calib/step_q_w_n": 734.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2707.0,
|
|
"completions/max_terminated_length": 2707.0,
|
|
"completions/mean_length": 501.26953125,
|
|
"completions/mean_terminated_length": 503.2353210449219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.15466666666666667,
|
|
"grad_norm": 0.051369134336709976,
|
|
"kl": 0.08301544189453125,
|
|
"learning_rate": 1.527777777777778e-06,
|
|
"loss": 0.1476,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03623100742697716,
|
|
"mask/share_reasoning": 0.8176732063293457,
|
|
"mask/share_step_conf": 0.14218956232070923,
|
|
"num_tokens": 34921918.0,
|
|
"reward": 0.9706292152404785,
|
|
"reward_std": 0.20070935785770416,
|
|
"rewards/accuracy_reward_step": 0.66796875,
|
|
"rewards/asymmetric_l2_reward": 0.888382613658905,
|
|
"rewards/final_brier_reward_step": 0.7231882810592651,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 145
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.679091215133667,
|
|
"adv/mean_abs_reasoning": 0.48408961296081543,
|
|
"adv/mean_abs_step_conf": 0.7339562177658081,
|
|
"adv/ratio_final_to_reasoning": 1.4028212895958914,
|
|
"adv/ratio_step_to_reasoning": 1.5161577487208304,
|
|
"adv/std_final_conf": 0.8739967942237854,
|
|
"adv/std_reasoning": 0.7575613856315613,
|
|
"adv/std_step_conf": 0.9337763786315918,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7498391248391247,
|
|
"calib/avg_num_step_conf": 5.89453125,
|
|
"calib/ece": 0.2546613545816732,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.5976095617529881,
|
|
"calib/gap": 0.42930566280566274,
|
|
"calib/mean_conf": 0.6447808764940238,
|
|
"calib/mu_c": 0.8842342342342342,
|
|
"calib/mu_w": 0.45492857142857146,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.22860557768924294,
|
|
"calib/std_conf": 0.43954187916360593,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.42036423841059606,
|
|
"calib/step_q_c_n": 604.0,
|
|
"calib/step_q_gap": 0.12298302294098279,
|
|
"calib/step_q_w": 0.29738121546961327,
|
|
"calib/step_q_w_n": 905.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2266.0,
|
|
"completions/max_terminated_length": 2266.0,
|
|
"completions/mean_length": 533.35546875,
|
|
"completions/mean_terminated_length": 535.4470825195312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.15573333333333333,
|
|
"grad_norm": 0.03525509685277939,
|
|
"kl": 0.0661773681640625,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": -0.069,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03156570717692375,
|
|
"mask/share_reasoning": 0.8492813110351562,
|
|
"mask/share_step_conf": 0.1152467131614685,
|
|
"num_tokens": 35165673.0,
|
|
"reward": 0.9413388967514038,
|
|
"reward_std": 0.20831407606601715,
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
|
"rewards/asymmetric_l2_reward": 0.8832393884658813,
|
|
"rewards/final_brier_reward_step": 0.7166258096694946,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 146
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6118881702423096,
|
|
"adv/mean_abs_reasoning": 0.44582316279411316,
|
|
"adv/mean_abs_step_conf": 0.7450541257858276,
|
|
"adv/ratio_final_to_reasoning": 1.3724907571141327,
|
|
"adv/ratio_step_to_reasoning": 1.671187564854954,
|
|
"adv/std_final_conf": 0.8005033135414124,
|
|
"adv/std_reasoning": 0.7206440567970276,
|
|
"adv/std_step_conf": 0.9336183667182922,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6944263787721123,
|
|
"calib/avg_num_step_conf": 5.6640625,
|
|
"calib/ece": 0.2990725806451613,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.6330645161290323,
|
|
"calib/gap": 0.32943548387096777,
|
|
"calib/mean_conf": 0.6661693548387098,
|
|
"calib/mu_c": 0.8308870967741936,
|
|
"calib/mu_w": 0.5014516129032258,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.23262096774193552,
|
|
"calib/std_conf": 0.44329803601898965,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.43249639249639243,
|
|
"calib/step_q_c_n": 693.0,
|
|
"calib/step_q_gap": 0.11671039513840037,
|
|
"calib/step_q_w": 0.31578599735799207,
|
|
"calib/step_q_w_n": 757.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2607.0,
|
|
"completions/max_terminated_length": 2607.0,
|
|
"completions/mean_length": 531.06640625,
|
|
"completions/mean_terminated_length": 535.248046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 169.0,
|
|
"epoch": 0.1568,
|
|
"grad_norm": 0.04022669792175293,
|
|
"kl": 0.08133697509765625,
|
|
"learning_rate": 1.4722222222222225e-06,
|
|
"loss": 0.0078,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.03174225613474846,
|
|
"mask/share_reasoning": 0.8448315262794495,
|
|
"mask/share_step_conf": 0.11561372131109238,
|
|
"num_tokens": 35405306.0,
|
|
"reward": 0.9095112681388855,
|
|
"reward_std": 0.21041998267173767,
|
|
"rewards/accuracy_reward_step": 0.484375,
|
|
"rewards/asymmetric_l2_reward": 0.8593860864639282,
|
|
"rewards/final_brier_reward_step": 0.6690112948417664,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 147
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.524472713470459,
|
|
"adv/mean_abs_reasoning": 0.4304084777832031,
|
|
"adv/mean_abs_step_conf": 0.7623563408851624,
|
|
"adv/ratio_final_to_reasoning": 1.218546428666389,
|
|
"adv/ratio_step_to_reasoning": 1.7712391373228513,
|
|
"adv/std_final_conf": 0.7791754007339478,
|
|
"adv/std_reasoning": 0.7013992667198181,
|
|
"adv/std_step_conf": 0.9334371089935303,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.752401059778109,
|
|
"calib/avg_num_step_conf": 5.72265625,
|
|
"calib/ece": 0.2162248995983935,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.7469879518072289,
|
|
"calib/gap": 0.34049180327868855,
|
|
"calib/mean_conf": 0.773574297188755,
|
|
"calib/mu_c": 0.8638251366120219,
|
|
"calib/mu_w": 0.5233333333333333,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.12742971887550195,
|
|
"calib/std_conf": 0.3913611243091092,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4259635666347075,
|
|
"calib/step_q_c_n": 1043.0,
|
|
"calib/step_q_gap": 0.13167446710864117,
|
|
"calib/step_q_w": 0.29428909952606636,
|
|
"calib/step_q_w_n": 422.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2980.0,
|
|
"completions/max_terminated_length": 2980.0,
|
|
"completions/mean_length": 516.078125,
|
|
"completions/mean_terminated_length": 520.1417236328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 115.0,
|
|
"epoch": 0.15786666666666666,
|
|
"grad_norm": 0.0584280788898468,
|
|
"kl": 0.07340240478515625,
|
|
"learning_rate": 1.4444444444444445e-06,
|
|
"loss": -0.0201,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03611718863248825,
|
|
"mask/share_reasoning": 0.8259831070899963,
|
|
"mask/share_step_conf": 0.13008719682693481,
|
|
"num_tokens": 35642534.0,
|
|
"reward": 0.9886473417282104,
|
|
"reward_std": 0.18284042179584503,
|
|
"rewards/accuracy_reward_step": 0.71484375,
|
|
"rewards/asymmetric_l2_reward": 0.8780118227005005,
|
|
"rewards/final_brier_reward_step": 0.7617827653884888,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 148
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.566154956817627,
|
|
"adv/mean_abs_reasoning": 0.4284716844558716,
|
|
"adv/mean_abs_step_conf": 0.7544533014297485,
|
|
"adv/ratio_final_to_reasoning": 1.3213357553290908,
|
|
"adv/ratio_step_to_reasoning": 1.7608008388882228,
|
|
"adv/std_final_conf": 0.7966720461845398,
|
|
"adv/std_reasoning": 0.7013741135597229,
|
|
"adv/std_step_conf": 0.9313024282455444,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.8105348988910633,
|
|
"calib/avg_num_step_conf": 6.25390625,
|
|
"calib/ece": 0.16796812749003978,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.7091633466135459,
|
|
"calib/gap": 0.5252994129158515,
|
|
"calib/mean_conf": 0.7432669322709163,
|
|
"calib/mu_c": 0.9630136986301372,
|
|
"calib/mu_w": 0.4377142857142857,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.16478087649402384,
|
|
"calib/std_conf": 0.4068463942424941,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4335380835380835,
|
|
"calib/step_q_c_n": 814.0,
|
|
"calib/step_q_gap": 0.15961178112385216,
|
|
"calib/step_q_w": 0.2739263024142313,
|
|
"calib/step_q_w_n": 787.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2226.0,
|
|
"completions/max_terminated_length": 2226.0,
|
|
"completions/mean_length": 548.18359375,
|
|
"completions/mean_terminated_length": 550.3333740234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 86.0,
|
|
"epoch": 0.15893333333333334,
|
|
"grad_norm": 0.0576673299074173,
|
|
"kl": 0.062652587890625,
|
|
"learning_rate": 1.4166666666666667e-06,
|
|
"loss": -0.0267,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03428906947374344,
|
|
"mask/share_reasoning": 0.8384957313537598,
|
|
"mask/share_step_conf": 0.12330888956785202,
|
|
"num_tokens": 35887325.0,
|
|
"reward": 1.0077321529388428,
|
|
"reward_std": 0.17122933268547058,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/asymmetric_l2_reward": 0.9006611108779907,
|
|
"rewards/final_brier_reward_step": 0.8046468496322632,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 149
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6269816160202026,
|
|
"adv/mean_abs_reasoning": 0.46332746744155884,
|
|
"adv/mean_abs_step_conf": 0.7745290994644165,
|
|
"adv/ratio_final_to_reasoning": 1.3532148643859239,
|
|
"adv/ratio_step_to_reasoning": 1.6716667020439722,
|
|
"adv/std_final_conf": 0.8205464482307434,
|
|
"adv/std_reasoning": 0.720609188079834,
|
|
"adv/std_step_conf": 0.9327038526535034,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7517542652724271,
|
|
"calib/avg_num_step_conf": 5.65625,
|
|
"calib/ece": 0.24335999999999997,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.732,
|
|
"calib/gap": 0.3722977435332966,
|
|
"calib/mean_conf": 0.7546400000000001,
|
|
"calib/mu_c": 0.8916455696202531,
|
|
"calib/mu_w": 0.5193478260869565,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.18299999999999997,
|
|
"calib/std_conf": 0.41221071116602487,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.42653179190751445,
|
|
"calib/step_q_c_n": 865.0,
|
|
"calib/step_q_gap": 0.10047690339979576,
|
|
"calib/step_q_w": 0.3260548885077187,
|
|
"calib/step_q_w_n": 583.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2716.0,
|
|
"completions/max_terminated_length": 2716.0,
|
|
"completions/mean_length": 478.453125,
|
|
"completions/mean_terminated_length": 478.453125,
|
|
"completions/min_length": 142.0,
|
|
"completions/min_terminated_length": 142.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.025151947513222694,
|
|
"kl": 0.0756683349609375,
|
|
"learning_rate": 1.3888888888888892e-06,
|
|
"loss": -0.0234,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.038418009877204895,
|
|
"mask/share_reasoning": 0.8237306475639343,
|
|
"mask/share_step_conf": 0.1378513127565384,
|
|
"num_tokens": 36114769.0,
|
|
"reward": 0.9638096690177917,
|
|
"reward_std": 0.19389088451862335,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.8756263256072998,
|
|
"rewards/final_brier_reward_step": 0.734024167060852,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 150
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6179725527763367,
|
|
"adv/mean_abs_reasoning": 0.427360862493515,
|
|
"adv/mean_abs_step_conf": 0.7608951330184937,
|
|
"adv/ratio_final_to_reasoning": 1.4460204642293704,
|
|
"adv/ratio_step_to_reasoning": 1.7804511357893467,
|
|
"adv/std_final_conf": 0.8229371905326843,
|
|
"adv/std_reasoning": 0.7013833522796631,
|
|
"adv/std_step_conf": 0.9335458874702454,
|
|
"calib/answer_extract_rate": 0.953125,
|
|
"calib/auroc": 0.8082643515714383,
|
|
"calib/avg_num_step_conf": 6.76171875,
|
|
"calib/ece": 0.22418032786885236,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 0.569672131147541,
|
|
"calib/gap": 0.5015182717544922,
|
|
"calib/mean_conf": 0.6102459016393442,
|
|
"calib/mu_c": 0.8712820512820512,
|
|
"calib/mu_w": 0.36976377952755907,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.17745901639344253,
|
|
"calib/std_conf": 0.4581940012822868,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40887905604719765,
|
|
"calib/step_q_c_n": 678.0,
|
|
"calib/step_q_gap": 0.17155711872526036,
|
|
"calib/step_q_w": 0.2373219373219373,
|
|
"calib/step_q_w_n": 1053.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2391.0,
|
|
"completions/max_terminated_length": 2391.0,
|
|
"completions/mean_length": 582.28515625,
|
|
"completions/mean_terminated_length": 591.52783203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 149.0,
|
|
"epoch": 0.16106666666666666,
|
|
"grad_norm": 0.031244348734617233,
|
|
"kl": 0.061309814453125,
|
|
"learning_rate": 1.3611111111111112e-06,
|
|
"loss": -0.0475,
|
|
"mask/has_final_conf_rate": 0.953125,
|
|
"mask/share_final_conf": 0.030964188277721405,
|
|
"mask/share_reasoning": 0.8384581804275513,
|
|
"mask/share_step_conf": 0.11495261639356613,
|
|
"num_tokens": 36370858.0,
|
|
"reward": 0.9330952167510986,
|
|
"reward_std": 0.18629847466945648,
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
|
"rewards/asymmetric_l2_reward": 0.8467037081718445,
|
|
"rewards/final_brier_reward_step": 0.7374554872512817,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"step": 151
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6945838928222656,
|
|
"adv/mean_abs_reasoning": 0.5879529118537903,
|
|
"adv/mean_abs_step_conf": 0.7506691217422485,
|
|
"adv/ratio_final_to_reasoning": 1.1813597293570202,
|
|
"adv/ratio_step_to_reasoning": 1.276750410803173,
|
|
"adv/std_final_conf": 0.8834863901138306,
|
|
"adv/std_reasoning": 0.8265911340713501,
|
|
"adv/std_step_conf": 0.9328155517578125,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7469903038979632,
|
|
"calib/avg_num_step_conf": 6.3828125,
|
|
"calib/ece": 0.3006854838709677,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.5967741935483871,
|
|
"calib/gap": 0.33690440554434803,
|
|
"calib/mean_conf": 0.6297177419354838,
|
|
"calib/mu_c": 0.7940944881889762,
|
|
"calib/mu_w": 0.4571900826446282,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.2091532258064516,
|
|
"calib/std_conf": 0.45284085469977603,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.3859042553191489,
|
|
"calib/step_q_c_n": 752.0,
|
|
"calib/step_q_gap": 0.10618770203116701,
|
|
"calib/step_q_w": 0.2797165532879819,
|
|
"calib/step_q_w_n": 882.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2350.0,
|
|
"completions/max_terminated_length": 2350.0,
|
|
"completions/mean_length": 544.1328125,
|
|
"completions/mean_terminated_length": 548.4172973632812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 157.0,
|
|
"epoch": 0.16213333333333332,
|
|
"grad_norm": 0.04397697374224663,
|
|
"kl": 0.09905242919921875,
|
|
"learning_rate": 1.3333333333333334e-06,
|
|
"loss": -0.0666,
|
|
"mask/has_final_conf_rate": 0.96875,
|
|
"mask/share_final_conf": 0.033129818737506866,
|
|
"mask/share_reasoning": 0.8305137157440186,
|
|
"mask/share_step_conf": 0.1285439282655716,
|
|
"num_tokens": 36615548.0,
|
|
"reward": 0.9151645302772522,
|
|
"reward_std": 0.22851672768592834,
|
|
"rewards/accuracy_reward_step": 0.5,
|
|
"rewards/asymmetric_l2_reward": 0.8679298162460327,
|
|
"rewards/final_brier_reward_step": 0.6702117323875427,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"step": 152
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6541010141372681,
|
|
"adv/mean_abs_reasoning": 0.4219273328781128,
|
|
"adv/mean_abs_step_conf": 0.7459571361541748,
|
|
"adv/ratio_final_to_reasoning": 1.550269354856482,
|
|
"adv/ratio_step_to_reasoning": 1.7679753787595183,
|
|
"adv/std_final_conf": 0.8594304323196411,
|
|
"adv/std_reasoning": 0.7204716801643372,
|
|
"adv/std_step_conf": 0.932479202747345,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6297800673667524,
|
|
"calib/avg_num_step_conf": 5.5625,
|
|
"calib/ece": 0.3261200000000001,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.648,
|
|
"calib/gap": 0.2313539396341061,
|
|
"calib/mean_conf": 0.68924,
|
|
"calib/mu_c": 0.7845578231292518,
|
|
"calib/mu_w": 0.5532038834951457,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.21368000000000006,
|
|
"calib/std_conf": 0.43288130289953614,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4063384188626907,
|
|
"calib/step_q_c_n": 721.0,
|
|
"calib/step_q_gap": 0.13020755115287558,
|
|
"calib/step_q_w": 0.2761308677098151,
|
|
"calib/step_q_w_n": 703.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3011.0,
|
|
"completions/max_terminated_length": 3011.0,
|
|
"completions/mean_length": 520.55078125,
|
|
"completions/mean_terminated_length": 522.5921630859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 165.0,
|
|
"epoch": 0.1632,
|
|
"grad_norm": 0.0666937530040741,
|
|
"kl": 0.0721282958984375,
|
|
"learning_rate": 1.3055555555555556e-06,
|
|
"loss": 0.0571,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03248149901628494,
|
|
"mask/share_reasoning": 0.8496717214584351,
|
|
"mask/share_step_conf": 0.11394055187702179,
|
|
"num_tokens": 36856129.0,
|
|
"reward": 0.9188251495361328,
|
|
"reward_std": 0.19821104407310486,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/asymmetric_l2_reward": 0.8702654838562012,
|
|
"rewards/final_brier_reward_step": 0.6564472913742065,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 153
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.593313455581665,
|
|
"adv/mean_abs_reasoning": 0.40208834409713745,
|
|
"adv/mean_abs_step_conf": 0.7636384963989258,
|
|
"adv/ratio_final_to_reasoning": 1.4755798428176545,
|
|
"adv/ratio_step_to_reasoning": 1.8991808830311285,
|
|
"adv/std_final_conf": 0.8072009682655334,
|
|
"adv/std_reasoning": 0.6816769242286682,
|
|
"adv/std_step_conf": 0.9328876733779907,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7256451612903225,
|
|
"calib/avg_num_step_conf": 5.28515625,
|
|
"calib/ece": 0.2687550200803214,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.6024096385542169,
|
|
"calib/gap": 0.3908619354838709,
|
|
"calib/mean_conf": 0.6415261044176708,
|
|
"calib/mu_c": 0.8377419354838709,
|
|
"calib/mu_w": 0.44688,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.2061445783132531,
|
|
"calib/std_conf": 0.4517632571194984,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4189130434782608,
|
|
"calib/step_q_c_n": 644.0,
|
|
"calib/step_q_gap": 0.12159287422579251,
|
|
"calib/step_q_w": 0.2973201692524683,
|
|
"calib/step_q_w_n": 709.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2620.0,
|
|
"completions/max_terminated_length": 2620.0,
|
|
"completions/mean_length": 517.84375,
|
|
"completions/mean_terminated_length": 519.87451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 149.0,
|
|
"epoch": 0.16426666666666667,
|
|
"grad_norm": 0.04594266042113304,
|
|
"kl": 0.075714111328125,
|
|
"learning_rate": 1.2777777777777779e-06,
|
|
"loss": 0.0992,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.034510090947151184,
|
|
"mask/share_reasoning": 0.8454740047454834,
|
|
"mask/share_step_conf": 0.11610963195562363,
|
|
"num_tokens": 37093137.0,
|
|
"reward": 0.9312186241149902,
|
|
"reward_std": 0.18028542399406433,
|
|
"rewards/accuracy_reward_step": 0.484375,
|
|
"rewards/asymmetric_l2_reward": 0.8699989318847656,
|
|
"rewards/final_brier_reward_step": 0.7010320425033569,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 154
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6469031572341919,
|
|
"adv/mean_abs_reasoning": 0.47271543741226196,
|
|
"adv/mean_abs_step_conf": 0.7635318040847778,
|
|
"adv/ratio_final_to_reasoning": 1.3684832481364857,
|
|
"adv/ratio_step_to_reasoning": 1.6152038703548637,
|
|
"adv/std_final_conf": 0.8352489471435547,
|
|
"adv/std_reasoning": 0.7392789125442505,
|
|
"adv/std_step_conf": 0.9331855773925781,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.7134146341463414,
|
|
"calib/avg_num_step_conf": 5.77734375,
|
|
"calib/ece": 0.29729411764705893,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5058823529411764,
|
|
"calib/gap": 0.32256097560975616,
|
|
"calib/mean_conf": 0.5472549019607844,
|
|
"calib/mu_c": 0.7142276422764228,
|
|
"calib/mu_w": 0.3916666666666666,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.18109803921568635,
|
|
"calib/std_conf": 0.46258641346990637,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.36075822603719593,
|
|
"calib/step_q_c_n": 699.0,
|
|
"calib/step_q_gap": 0.08251463629360617,
|
|
"calib/step_q_w": 0.27824358974358976,
|
|
"calib/step_q_w_n": 780.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2952.0,
|
|
"completions/max_terminated_length": 2952.0,
|
|
"completions/mean_length": 478.0625,
|
|
"completions/mean_terminated_length": 478.0625,
|
|
"completions/min_length": 139.0,
|
|
"completions/min_terminated_length": 139.0,
|
|
"epoch": 0.16533333333333333,
|
|
"grad_norm": 0.0336977019906044,
|
|
"kl": 0.08032989501953125,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.0138,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.03487030789256096,
|
|
"mask/share_reasoning": 0.8355661034584045,
|
|
"mask/share_step_conf": 0.12956362962722778,
|
|
"num_tokens": 37322737.0,
|
|
"reward": 0.9273616671562195,
|
|
"reward_std": 0.1708342432975769,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/asymmetric_l2_reward": 0.8782615661621094,
|
|
"rewards/final_brier_reward_step": 0.6827117204666138,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 155
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6737991571426392,
|
|
"adv/mean_abs_reasoning": 0.4827231764793396,
|
|
"adv/mean_abs_step_conf": 0.7515172958374023,
|
|
"adv/ratio_final_to_reasoning": 1.3958293075067996,
|
|
"adv/ratio_step_to_reasoning": 1.5568287011169994,
|
|
"adv/std_final_conf": 0.8648738265037537,
|
|
"adv/std_reasoning": 0.7206152677536011,
|
|
"adv/std_step_conf": 0.9330052733421326,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7186783804430862,
|
|
"calib/avg_num_step_conf": 5.82421875,
|
|
"calib/ece": 0.25689243027888453,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.5577689243027888,
|
|
"calib/gap": 0.3968054494525084,
|
|
"calib/mean_conf": 0.6011155378486056,
|
|
"calib/mu_c": 0.7892424242424243,
|
|
"calib/mu_w": 0.3924369747899159,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.1660557768924303,
|
|
"calib/std_conf": 0.4584387486378078,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.3498605830164765,
|
|
"calib/step_q_c_n": 789.0,
|
|
"calib/step_q_gap": 0.0351169932728867,
|
|
"calib/step_q_w": 0.3147435897435898,
|
|
"calib/step_q_w_n": 702.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2925.0,
|
|
"completions/max_terminated_length": 2925.0,
|
|
"completions/mean_length": 514.51171875,
|
|
"completions/mean_terminated_length": 514.51171875,
|
|
"completions/min_length": 150.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.1664,
|
|
"grad_norm": 0.04300342872738838,
|
|
"kl": 0.08170700073242188,
|
|
"learning_rate": 1.2222222222222223e-06,
|
|
"loss": -0.0075,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03448965772986412,
|
|
"mask/share_reasoning": 0.8360552787780762,
|
|
"mask/share_step_conf": 0.129455104470253,
|
|
"num_tokens": 37559212.0,
|
|
"reward": 0.9496381282806396,
|
|
"reward_std": 0.16709250211715698,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/asymmetric_l2_reward": 0.8816511631011963,
|
|
"rewards/final_brier_reward_step": 0.7184062004089355,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 156
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5912591218948364,
|
|
"adv/mean_abs_reasoning": 0.49544456601142883,
|
|
"adv/mean_abs_step_conf": 0.7444217205047607,
|
|
"adv/ratio_final_to_reasoning": 1.1933910722944479,
|
|
"adv/ratio_step_to_reasoning": 1.5025328191561769,
|
|
"adv/std_final_conf": 0.82242351770401,
|
|
"adv/std_reasoning": 0.7575790882110596,
|
|
"adv/std_step_conf": 0.933133602142334,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.790162701668034,
|
|
"calib/avg_num_step_conf": 6.3203125,
|
|
"calib/ece": 0.20235059760956173,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.6135458167330677,
|
|
"calib/gap": 0.49839896089691005,
|
|
"calib/mean_conf": 0.6459362549800797,
|
|
"calib/mu_c": 0.8286163522012578,
|
|
"calib/mu_w": 0.3302173913043478,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.10741035856573701,
|
|
"calib/std_conf": 0.45350540384328225,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3897894736842106,
|
|
"calib/step_q_c_n": 1045.0,
|
|
"calib/step_q_gap": 0.11057481399834668,
|
|
"calib/step_q_w": 0.2792146596858639,
|
|
"calib/step_q_w_n": 573.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2485.0,
|
|
"completions/max_terminated_length": 2485.0,
|
|
"completions/mean_length": 497.61328125,
|
|
"completions/mean_terminated_length": 501.531494140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 163.0,
|
|
"epoch": 0.16746666666666668,
|
|
"grad_norm": 0.04095279052853584,
|
|
"kl": 0.0734100341796875,
|
|
"learning_rate": 1.1944444444444446e-06,
|
|
"loss": -0.0316,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03470167517662048,
|
|
"mask/share_reasoning": 0.8203328847885132,
|
|
"mask/share_step_conf": 0.13715294003486633,
|
|
"num_tokens": 37790329.0,
|
|
"reward": 0.9915717244148254,
|
|
"reward_std": 0.1898421347141266,
|
|
"rewards/accuracy_reward_step": 0.62109375,
|
|
"rewards/asymmetric_l2_reward": 0.8848938345909119,
|
|
"rewards/final_brier_reward_step": 0.7779370546340942,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 157
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5748130083084106,
|
|
"adv/mean_abs_reasoning": 0.41429877281188965,
|
|
"adv/mean_abs_step_conf": 0.7429898977279663,
|
|
"adv/ratio_final_to_reasoning": 1.3874359424409923,
|
|
"adv/ratio_step_to_reasoning": 1.7933673630873082,
|
|
"adv/std_final_conf": 0.7997155785560608,
|
|
"adv/std_reasoning": 0.6816416382789612,
|
|
"adv/std_step_conf": 0.9338192939758301,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7263106796116505,
|
|
"calib/avg_num_step_conf": 5.40234375,
|
|
"calib/ece": 0.27177865612648217,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6086956521739131,
|
|
"calib/gap": 0.3565825242718448,
|
|
"calib/mean_conf": 0.6508300395256919,
|
|
"calib/mu_c": 0.7960000000000002,
|
|
"calib/mu_w": 0.43941747572815537,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.16486166007905134,
|
|
"calib/std_conf": 0.4487590517510003,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42103761348897534,
|
|
"calib/step_q_c_n": 771.0,
|
|
"calib/step_q_gap": 0.1080310775412629,
|
|
"calib/step_q_w": 0.31300653594771244,
|
|
"calib/step_q_w_n": 612.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2833.0,
|
|
"completions/max_terminated_length": 2833.0,
|
|
"completions/mean_length": 500.37109375,
|
|
"completions/mean_terminated_length": 500.37109375,
|
|
"completions/min_length": 131.0,
|
|
"completions/min_terminated_length": 131.0,
|
|
"epoch": 0.16853333333333334,
|
|
"grad_norm": 0.05039157345890999,
|
|
"kl": 0.0789794921875,
|
|
"learning_rate": 1.1666666666666668e-06,
|
|
"loss": 0.0354,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.037866123020648956,
|
|
"mask/share_reasoning": 0.836431622505188,
|
|
"mask/share_step_conf": 0.12570224702358246,
|
|
"num_tokens": 38023664.0,
|
|
"reward": 0.9502644538879395,
|
|
"reward_std": 0.16423243284225464,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.8681695461273193,
|
|
"rewards/final_brier_reward_step": 0.7175155878067017,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 158
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6193236708641052,
|
|
"adv/mean_abs_reasoning": 0.4926747977733612,
|
|
"adv/mean_abs_step_conf": 0.7591203451156616,
|
|
"adv/ratio_final_to_reasoning": 1.2570638353395227,
|
|
"adv/ratio_step_to_reasoning": 1.540814242064945,
|
|
"adv/std_final_conf": 0.815812349319458,
|
|
"adv/std_reasoning": 0.7574940323829651,
|
|
"adv/std_step_conf": 0.9329251050949097,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6892206410426918,
|
|
"calib/avg_num_step_conf": 5.359375,
|
|
"calib/ece": 0.3099196787148594,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.5140562248995983,
|
|
"calib/gap": 0.3123513765128343,
|
|
"calib/mean_conf": 0.5535341365461848,
|
|
"calib/mu_c": 0.6827397260273973,
|
|
"calib/mu_w": 0.370388349514563,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.13855421686746988,
|
|
"calib/std_conf": 0.46807771451615177,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42608767123287666,
|
|
"calib/step_q_c_n": 730.0,
|
|
"calib/step_q_gap": 0.1379412537873938,
|
|
"calib/step_q_w": 0.28814641744548286,
|
|
"calib/step_q_w_n": 642.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3024.0,
|
|
"completions/max_terminated_length": 3024.0,
|
|
"completions/mean_length": 501.59375,
|
|
"completions/mean_terminated_length": 501.59375,
|
|
"completions/min_length": 130.0,
|
|
"completions/min_terminated_length": 130.0,
|
|
"epoch": 0.1696,
|
|
"grad_norm": 0.049436114728450775,
|
|
"kl": 0.080230712890625,
|
|
"learning_rate": 1.138888888888889e-06,
|
|
"loss": 0.0599,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03590218350291252,
|
|
"mask/share_reasoning": 0.8452440500259399,
|
|
"mask/share_step_conf": 0.11885374784469604,
|
|
"num_tokens": 38256856.0,
|
|
"reward": 0.9326581358909607,
|
|
"reward_std": 0.1965138167142868,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/asymmetric_l2_reward": 0.8867565393447876,
|
|
"rewards/final_brier_reward_step": 0.6699659824371338,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 159
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.651262104511261,
|
|
"adv/mean_abs_reasoning": 0.4572453498840332,
|
|
"adv/mean_abs_step_conf": 0.7492605447769165,
|
|
"adv/ratio_final_to_reasoning": 1.4243165177654282,
|
|
"adv/ratio_step_to_reasoning": 1.6386400538943575,
|
|
"adv/std_final_conf": 0.8251065611839294,
|
|
"adv/std_reasoning": 0.7206948399543762,
|
|
"adv/std_step_conf": 0.933678925037384,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.7278619864379737,
|
|
"calib/avg_num_step_conf": 5.7578125,
|
|
"calib/ece": 0.30607287449392717,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.5060728744939271,
|
|
"calib/gap": 0.35182289589150384,
|
|
"calib/mean_conf": 0.5299595141700404,
|
|
"calib/mu_c": 0.6852173913043479,
|
|
"calib/mu_w": 0.33339449541284405,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.13866396761133604,
|
|
"calib/std_conf": 0.4773497160335752,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.40606488011283504,
|
|
"calib/step_q_c_n": 709.0,
|
|
"calib/step_q_gap": 0.13901912847884806,
|
|
"calib/step_q_w": 0.26704575163398697,
|
|
"calib/step_q_w_n": 765.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2483.0,
|
|
"completions/max_terminated_length": 2483.0,
|
|
"completions/mean_length": 499.25,
|
|
"completions/mean_terminated_length": 501.2078857421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.17066666666666666,
|
|
"grad_norm": 0.03691410645842552,
|
|
"kl": 0.0764007568359375,
|
|
"learning_rate": 1.111111111111111e-06,
|
|
"loss": 0.0152,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.03438437357544899,
|
|
"mask/share_reasoning": 0.8391258716583252,
|
|
"mask/share_step_conf": 0.12258350849151611,
|
|
"num_tokens": 38489504.0,
|
|
"reward": 0.9193039536476135,
|
|
"reward_std": 0.19498516619205475,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/asymmetric_l2_reward": 0.8641306161880493,
|
|
"rewards/final_brier_reward_step": 0.6736960411071777,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 160
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5909967422485352,
|
|
"adv/mean_abs_reasoning": 0.3688772916793823,
|
|
"adv/mean_abs_step_conf": 0.748254120349884,
|
|
"adv/ratio_final_to_reasoning": 1.602149971221901,
|
|
"adv/ratio_step_to_reasoning": 2.028463495118711,
|
|
"adv/std_final_conf": 0.8158046007156372,
|
|
"adv/std_reasoning": 0.6814883947372437,
|
|
"adv/std_step_conf": 0.9323292374610901,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7071225071225071,
|
|
"calib/avg_num_step_conf": 5.46484375,
|
|
"calib/ece": 0.32039370078740165,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.5118110236220472,
|
|
"calib/gap": 0.3662433862433863,
|
|
"calib/mean_conf": 0.5425196850393701,
|
|
"calib/mu_c": 0.6362433862433863,
|
|
"calib/mu_w": 0.26999999999999996,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.05940944881889766,
|
|
"calib/std_conf": 0.47380262428341774,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.38986328125,
|
|
"calib/step_q_c_n": 1024.0,
|
|
"calib/step_q_gap": 0.11586328125,
|
|
"calib/step_q_w": 0.274,
|
|
"calib/step_q_w_n": 375.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2926.0,
|
|
"completions/max_terminated_length": 2926.0,
|
|
"completions/mean_length": 456.0859375,
|
|
"completions/mean_terminated_length": 456.0859375,
|
|
"completions/min_length": 150.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.17173333333333332,
|
|
"grad_norm": 0.10700822621583939,
|
|
"kl": 0.079345703125,
|
|
"learning_rate": 1.0833333333333335e-06,
|
|
"loss": 0.0686,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03930460289120674,
|
|
"mask/share_reasoning": 0.8258973360061646,
|
|
"mask/share_step_conf": 0.1347980797290802,
|
|
"num_tokens": 38710182.0,
|
|
"reward": 0.9591859579086304,
|
|
"reward_std": 0.14740516245365143,
|
|
"rewards/accuracy_reward_step": 0.73828125,
|
|
"rewards/asymmetric_l2_reward": 0.8936820030212402,
|
|
"rewards/final_brier_reward_step": 0.6785961389541626,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 161
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5586308240890503,
|
|
"adv/mean_abs_reasoning": 0.4160280227661133,
|
|
"adv/mean_abs_step_conf": 0.7616361975669861,
|
|
"adv/ratio_final_to_reasoning": 1.3427721055297923,
|
|
"adv/ratio_step_to_reasoning": 1.8307329215541093,
|
|
"adv/std_final_conf": 0.7841950058937073,
|
|
"adv/std_reasoning": 0.701278567314148,
|
|
"adv/std_step_conf": 0.9325926899909973,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.8157858707557503,
|
|
"calib/avg_num_step_conf": 5.265625,
|
|
"calib/ece": 0.20877952755905524,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.5669291338582677,
|
|
"calib/gap": 0.5165840635268346,
|
|
"calib/mean_conf": 0.6009055118110236,
|
|
"calib/mu_c": 0.7798795180722891,
|
|
"calib/mu_w": 0.2632954545454545,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.07807086614173239,
|
|
"calib/std_conf": 0.4637407457415819,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4075116279069767,
|
|
"calib/step_q_c_n": 860.0,
|
|
"calib/step_q_gap": 0.1085157262676324,
|
|
"calib/step_q_w": 0.2989959016393443,
|
|
"calib/step_q_w_n": 488.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1455.0,
|
|
"completions/max_terminated_length": 1455.0,
|
|
"completions/mean_length": 442.27734375,
|
|
"completions/mean_terminated_length": 444.01177978515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.1728,
|
|
"grad_norm": 0.08423297107219696,
|
|
"kl": 0.08339691162109375,
|
|
"learning_rate": 1.0555555555555557e-06,
|
|
"loss": 0.0107,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03754296526312828,
|
|
"mask/share_reasoning": 0.8310139179229736,
|
|
"mask/share_step_conf": 0.1275368630886078,
|
|
"num_tokens": 38927549.0,
|
|
"reward": 0.999754786491394,
|
|
"reward_std": 0.14653810858726501,
|
|
"rewards/accuracy_reward_step": 0.6484375,
|
|
"rewards/asymmetric_l2_reward": 0.8925575613975525,
|
|
"rewards/final_brier_reward_step": 0.7796082496643066,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 162
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5742782354354858,
|
|
"adv/mean_abs_reasoning": 0.4988449811935425,
|
|
"adv/mean_abs_step_conf": 0.7395628690719604,
|
|
"adv/ratio_final_to_reasoning": 1.1512158227220426,
|
|
"adv/ratio_step_to_reasoning": 1.4825504855285372,
|
|
"adv/std_final_conf": 0.8037339448928833,
|
|
"adv/std_reasoning": 0.7575864791870117,
|
|
"adv/std_step_conf": 0.9324069023132324,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7783159227603672,
|
|
"calib/avg_num_step_conf": 6.00390625,
|
|
"calib/ece": 0.22789682539682532,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.46825396825396826,
|
|
"calib/gap": 0.4833561253561253,
|
|
"calib/mean_conf": 0.5043253968253968,
|
|
"calib/mu_c": 0.7287407407407407,
|
|
"calib/mu_w": 0.24538461538461537,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.09825396825396818,
|
|
"calib/std_conf": 0.47483825699640314,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4153507565337002,
|
|
"calib/step_q_c_n": 727.0,
|
|
"calib/step_q_gap": 0.15149890468184835,
|
|
"calib/step_q_w": 0.26385185185185184,
|
|
"calib/step_q_w_n": 810.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2386.0,
|
|
"completions/max_terminated_length": 2386.0,
|
|
"completions/mean_length": 545.7578125,
|
|
"completions/mean_terminated_length": 547.8980712890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 129.0,
|
|
"epoch": 0.17386666666666667,
|
|
"grad_norm": 0.02793508768081665,
|
|
"kl": 0.07421112060546875,
|
|
"learning_rate": 1.0277777777777777e-06,
|
|
"loss": -0.0248,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03470218926668167,
|
|
"mask/share_reasoning": 0.8333349227905273,
|
|
"mask/share_step_conf": 0.1280566155910492,
|
|
"num_tokens": 39172095.0,
|
|
"reward": 0.9705761671066284,
|
|
"reward_std": 0.16870234906673431,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/asymmetric_l2_reward": 0.8863610029220581,
|
|
"rewards/final_brier_reward_step": 0.7532289028167725,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 163
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6500420570373535,
|
|
"adv/mean_abs_reasoning": 0.44920510053634644,
|
|
"adv/mean_abs_step_conf": 0.7497342824935913,
|
|
"adv/ratio_final_to_reasoning": 1.4470941141612368,
|
|
"adv/ratio_step_to_reasoning": 1.6690244202445965,
|
|
"adv/std_final_conf": 0.8430293202400208,
|
|
"adv/std_reasoning": 0.7015290856361389,
|
|
"adv/std_step_conf": 0.9337033033370972,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.770204987596292,
|
|
"calib/avg_num_step_conf": 5.8984375,
|
|
"calib/ece": 0.25626506024096396,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.5301204819277109,
|
|
"calib/gap": 0.42616921269095165,
|
|
"calib/mean_conf": 0.5546586345381527,
|
|
"calib/mu_c": 0.7446376811594202,
|
|
"calib/mu_w": 0.31846846846846855,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.12835341365461853,
|
|
"calib/std_conf": 0.47459640847724605,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3912232030264817,
|
|
"calib/step_q_c_n": 793.0,
|
|
"calib/step_q_gap": 0.10328735923289734,
|
|
"calib/step_q_w": 0.2879358437935844,
|
|
"calib/step_q_w_n": 717.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2351.0,
|
|
"completions/max_terminated_length": 2351.0,
|
|
"completions/mean_length": 551.2109375,
|
|
"completions/mean_terminated_length": 557.7470703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 123.0,
|
|
"epoch": 0.17493333333333333,
|
|
"grad_norm": 0.06633436679840088,
|
|
"kl": 0.07451629638671875,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": -0.0535,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.030875276774168015,
|
|
"mask/share_reasoning": 0.8452262282371521,
|
|
"mask/share_step_conf": 0.11217975616455078,
|
|
"num_tokens": 39419341.0,
|
|
"reward": 0.9514139890670776,
|
|
"reward_std": 0.2097827047109604,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/asymmetric_l2_reward": 0.8823947906494141,
|
|
"rewards/final_brier_reward_step": 0.7180894613265991,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 164
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5945655107498169,
|
|
"adv/mean_abs_reasoning": 0.49014484882354736,
|
|
"adv/mean_abs_step_conf": 0.7411804795265198,
|
|
"adv/ratio_final_to_reasoning": 1.2130404148424725,
|
|
"adv/ratio_step_to_reasoning": 1.5121662123054271,
|
|
"adv/std_final_conf": 0.8082362413406372,
|
|
"adv/std_reasoning": 0.7575728297233582,
|
|
"adv/std_step_conf": 0.9333819150924683,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7232782898105479,
|
|
"calib/avg_num_step_conf": 5.5,
|
|
"calib/ece": 0.2599600000000001,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.572,
|
|
"calib/gap": 0.4444726062467999,
|
|
"calib/mean_conf": 0.60276,
|
|
"calib/mu_c": 0.8267741935483872,
|
|
"calib/mu_w": 0.3823015873015873,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.18336000000000005,
|
|
"calib/std_conf": 0.47151583472880315,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4217050691244239,
|
|
"calib/step_q_c_n": 651.0,
|
|
"calib/step_q_gap": 0.1091423214361808,
|
|
"calib/step_q_w": 0.3125627476882431,
|
|
"calib/step_q_w_n": 757.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2518.0,
|
|
"completions/max_terminated_length": 2518.0,
|
|
"completions/mean_length": 535.36328125,
|
|
"completions/mean_terminated_length": 535.36328125,
|
|
"completions/min_length": 126.0,
|
|
"completions/min_terminated_length": 126.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.06460738927125931,
|
|
"kl": 0.12464141845703125,
|
|
"learning_rate": 9.722222222222224e-07,
|
|
"loss": -0.0664,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03398456051945686,
|
|
"mask/share_reasoning": 0.8485789895057678,
|
|
"mask/share_step_conf": 0.11743646115064621,
|
|
"num_tokens": 39661970.0,
|
|
"reward": 0.9417548775672913,
|
|
"reward_std": 0.1901148110628128,
|
|
"rewards/accuracy_reward_step": 0.484375,
|
|
"rewards/asymmetric_l2_reward": 0.8748019337654114,
|
|
"rewards/final_brier_reward_step": 0.7173015475273132,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 165
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.4994466304779053,
|
|
"adv/mean_abs_reasoning": 0.37650659680366516,
|
|
"adv/mean_abs_step_conf": 0.7547671794891357,
|
|
"adv/ratio_final_to_reasoning": 1.326528232753247,
|
|
"adv/ratio_step_to_reasoning": 2.004658579415861,
|
|
"adv/std_final_conf": 0.7379820942878723,
|
|
"adv/std_reasoning": 0.6612639427185059,
|
|
"adv/std_step_conf": 0.9327965378761292,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.8125730994152047,
|
|
"calib/avg_num_step_conf": 6.0859375,
|
|
"calib/ece": 0.18629482071713144,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.6374501992031872,
|
|
"calib/gap": 0.5553486842105264,
|
|
"calib/mean_conf": 0.6624701195219124,
|
|
"calib/mu_c": 0.8394736842105264,
|
|
"calib/mu_w": 0.284125,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.08374501992031869,
|
|
"calib/std_conf": 0.4574895489015776,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4180082559339525,
|
|
"calib/step_q_c_n": 969.0,
|
|
"calib/step_q_gap": 0.15486733912580308,
|
|
"calib/step_q_w": 0.26314091680814944,
|
|
"calib/step_q_w_n": 589.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2201.0,
|
|
"completions/max_terminated_length": 2201.0,
|
|
"completions/mean_length": 531.1875,
|
|
"completions/mean_terminated_length": 533.2706298828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.17706666666666668,
|
|
"grad_norm": 0.0442763976752758,
|
|
"kl": 0.06806182861328125,
|
|
"learning_rate": 9.444444444444445e-07,
|
|
"loss": -0.0328,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.032991744577884674,
|
|
"mask/share_reasoning": 0.8350811004638672,
|
|
"mask/share_step_conf": 0.12802088260650635,
|
|
"num_tokens": 39904138.0,
|
|
"reward": 1.0148489475250244,
|
|
"reward_std": 0.15825651586055756,
|
|
"rewards/accuracy_reward_step": 0.66796875,
|
|
"rewards/asymmetric_l2_reward": 0.9015299081802368,
|
|
"rewards/final_brier_reward_step": 0.7984804511070251,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 166
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.48053890466690063,
|
|
"adv/mean_abs_reasoning": 0.4025387167930603,
|
|
"adv/mean_abs_step_conf": 0.7606292963027954,
|
|
"adv/ratio_final_to_reasoning": 1.1937706476913603,
|
|
"adv/ratio_step_to_reasoning": 1.8895804666009919,
|
|
"adv/std_final_conf": 0.7448135614395142,
|
|
"adv/std_reasoning": 0.6815594434738159,
|
|
"adv/std_step_conf": 0.9320181608200073,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6876629324314802,
|
|
"calib/avg_num_step_conf": 5.83984375,
|
|
"calib/ece": 0.23704724409448819,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.7637795275590551,
|
|
"calib/gap": 0.3711963644049886,
|
|
"calib/mean_conf": 0.770984251968504,
|
|
"calib/mu_c": 0.8922807017543862,
|
|
"calib/mu_w": 0.5210843373493976,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.16740157480314963,
|
|
"calib/std_conf": 0.4104494077074939,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4270010235414535,
|
|
"calib/step_q_c_n": 977.0,
|
|
"calib/step_q_gap": 0.05418249072292064,
|
|
"calib/step_q_w": 0.37281853281853283,
|
|
"calib/step_q_w_n": 518.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2715.0,
|
|
"completions/max_terminated_length": 2715.0,
|
|
"completions/mean_length": 474.296875,
|
|
"completions/mean_terminated_length": 476.1568908691406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 166.0,
|
|
"epoch": 0.17813333333333334,
|
|
"grad_norm": 0.051055651158094406,
|
|
"kl": 0.07321929931640625,
|
|
"learning_rate": 9.166666666666666e-07,
|
|
"loss": -0.0208,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03513041511178017,
|
|
"mask/share_reasoning": 0.8342991471290588,
|
|
"mask/share_step_conf": 0.1266641467809677,
|
|
"num_tokens": 40131166.0,
|
|
"reward": 0.9845085144042969,
|
|
"reward_std": 0.1467195451259613,
|
|
"rewards/accuracy_reward_step": 0.66796875,
|
|
"rewards/asymmetric_l2_reward": 0.8823486566543579,
|
|
"rewards/final_brier_reward_step": 0.7554183006286621,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 167
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5689985752105713,
|
|
"adv/mean_abs_reasoning": 0.5295567512512207,
|
|
"adv/mean_abs_step_conf": 0.7315965294837952,
|
|
"adv/ratio_final_to_reasoning": 1.0744808254566836,
|
|
"adv/ratio_step_to_reasoning": 1.3815262061246523,
|
|
"adv/std_final_conf": 0.7971473336219788,
|
|
"adv/std_reasoning": 0.7928540110588074,
|
|
"adv/std_step_conf": 0.9335496425628662,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.712508809020437,
|
|
"calib/avg_num_step_conf": 6.125,
|
|
"calib/ece": 0.2544621513944223,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.6892430278884463,
|
|
"calib/gap": 0.3601057082452431,
|
|
"calib/mean_conf": 0.7120717131474104,
|
|
"calib/mu_c": 0.8354545454545454,
|
|
"calib/mu_w": 0.4753488372093024,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.15458167330677292,
|
|
"calib/std_conf": 0.4352474348213658,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4282789651293588,
|
|
"calib/step_q_c_n": 889.0,
|
|
"calib/step_q_gap": 0.17523036424570637,
|
|
"calib/step_q_w": 0.25304860088365244,
|
|
"calib/step_q_w_n": 679.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2918.0,
|
|
"completions/max_terminated_length": 2918.0,
|
|
"completions/mean_length": 566.99609375,
|
|
"completions/mean_terminated_length": 569.2196655273438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 149.0,
|
|
"epoch": 0.1792,
|
|
"grad_norm": 0.031764764338731766,
|
|
"kl": 0.06189727783203125,
|
|
"learning_rate": 8.88888888888889e-07,
|
|
"loss": 0.0167,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03189847618341446,
|
|
"mask/share_reasoning": 0.8458576798439026,
|
|
"mask/share_step_conf": 0.11833761632442474,
|
|
"num_tokens": 40380989.0,
|
|
"reward": 0.9734334945678711,
|
|
"reward_std": 0.20216943323612213,
|
|
"rewards/accuracy_reward_step": 0.64453125,
|
|
"rewards/asymmetric_l2_reward": 0.8927955031394958,
|
|
"rewards/final_brier_reward_step": 0.7298526763916016,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 168
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5428816080093384,
|
|
"adv/mean_abs_reasoning": 0.45098912715911865,
|
|
"adv/mean_abs_step_conf": 0.7654911279678345,
|
|
"adv/ratio_final_to_reasoning": 1.2037576414071687,
|
|
"adv/ratio_step_to_reasoning": 1.6973604946749699,
|
|
"adv/std_final_conf": 0.7792088985443115,
|
|
"adv/std_reasoning": 0.7014132142066956,
|
|
"adv/std_step_conf": 0.9325076341629028,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.709286971830986,
|
|
"calib/avg_num_step_conf": 5.203125,
|
|
"calib/ece": 0.29669291338582693,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.6929133858267716,
|
|
"calib/gap": 0.3395787223340041,
|
|
"calib/mean_conf": 0.718503937007874,
|
|
"calib/mu_c": 0.8682394366197184,
|
|
"calib/mu_w": 0.5286607142857143,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2280708661417324,
|
|
"calib/std_conf": 0.4331410250972485,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4691484049930652,
|
|
"calib/step_q_c_n": 721.0,
|
|
"calib/step_q_gap": 0.12826460793905542,
|
|
"calib/step_q_w": 0.3408837970540098,
|
|
"calib/step_q_w_n": 611.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1644.0,
|
|
"completions/max_terminated_length": 1644.0,
|
|
"completions/mean_length": 477.62890625,
|
|
"completions/mean_terminated_length": 479.5019836425781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.0,
|
|
"epoch": 0.18026666666666666,
|
|
"grad_norm": 0.036118652671575546,
|
|
"kl": 0.0745849609375,
|
|
"learning_rate": 8.611111111111112e-07,
|
|
"loss": 0.0798,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03544265776872635,
|
|
"mask/share_reasoning": 0.8431687355041504,
|
|
"mask/share_step_conf": 0.11748235672712326,
|
|
"num_tokens": 40607446.0,
|
|
"reward": 0.9495621919631958,
|
|
"reward_std": 0.17833659052848816,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/asymmetric_l2_reward": 0.8874062895774841,
|
|
"rewards/final_brier_reward_step": 0.7023429870605469,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 169
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5604931116104126,
|
|
"adv/mean_abs_reasoning": 0.47254762053489685,
|
|
"adv/mean_abs_step_conf": 0.7200212478637695,
|
|
"adv/ratio_final_to_reasoning": 1.1861092665665451,
|
|
"adv/ratio_step_to_reasoning": 1.523700927853042,
|
|
"adv/std_final_conf": 0.7950620651245117,
|
|
"adv/std_reasoning": 0.7573960423469543,
|
|
"adv/std_step_conf": 0.9339142441749573,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7524666666666667,
|
|
"calib/avg_num_step_conf": 6.33984375,
|
|
"calib/ece": 0.23223999999999984,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.732,
|
|
"calib/gap": 0.4186000000000002,
|
|
"calib/mean_conf": 0.75776,
|
|
"calib/mu_c": 0.9252000000000002,
|
|
"calib/mu_w": 0.5066,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.19499999999999984,
|
|
"calib/std_conf": 0.4107035212899933,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4403399122807018,
|
|
"calib/step_q_c_n": 912.0,
|
|
"calib/step_q_gap": 0.1492147364719817,
|
|
"calib/step_q_w": 0.2911251758087201,
|
|
"calib/step_q_w_n": 711.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3006.0,
|
|
"completions/max_terminated_length": 3006.0,
|
|
"completions/mean_length": 536.828125,
|
|
"completions/mean_terminated_length": 536.828125,
|
|
"completions/min_length": 150.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.18133333333333335,
|
|
"grad_norm": 0.04905329644680023,
|
|
"kl": 0.06963348388671875,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": 0.0278,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03186079114675522,
|
|
"mask/share_reasoning": 0.8381197452545166,
|
|
"mask/share_step_conf": 0.13001945614814758,
|
|
"num_tokens": 40849026.0,
|
|
"reward": 0.972845196723938,
|
|
"reward_std": 0.2081618309020996,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.8838129043579102,
|
|
"rewards/final_brier_reward_step": 0.7493773698806763,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 170
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6511637568473816,
|
|
"adv/mean_abs_reasoning": 0.5285821557044983,
|
|
"adv/mean_abs_step_conf": 0.7396926879882812,
|
|
"adv/ratio_final_to_reasoning": 1.2319064308546428,
|
|
"adv/ratio_step_to_reasoning": 1.3993901988658948,
|
|
"adv/std_final_conf": 0.8735871315002441,
|
|
"adv/std_reasoning": 0.79267817735672,
|
|
"adv/std_step_conf": 0.9336190223693848,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.708744492025073,
|
|
"calib/avg_num_step_conf": 5.5546875,
|
|
"calib/ece": 0.2965354330708661,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6496062992125984,
|
|
"calib/gap": 0.3646049773474835,
|
|
"calib/mean_conf": 0.6776377952755905,
|
|
"calib/mu_c": 0.8541984732824428,
|
|
"calib/mu_w": 0.4895934959349593,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.22921259842519678,
|
|
"calib/std_conf": 0.4501171443201061,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4538608458390178,
|
|
"calib/step_q_c_n": 733.0,
|
|
"calib/step_q_gap": 0.15528319707268978,
|
|
"calib/step_q_w": 0.29857764876632803,
|
|
"calib/step_q_w_n": 689.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1665.0,
|
|
"completions/max_terminated_length": 1665.0,
|
|
"completions/mean_length": 484.640625,
|
|
"completions/mean_terminated_length": 486.54119873046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 160.0,
|
|
"epoch": 0.1824,
|
|
"grad_norm": 0.04068433865904808,
|
|
"kl": 0.06620025634765625,
|
|
"learning_rate": 8.055555555555557e-07,
|
|
"loss": -0.0137,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03494654595851898,
|
|
"mask/share_reasoning": 0.8401431441307068,
|
|
"mask/share_step_conf": 0.12100405246019363,
|
|
"num_tokens": 41079990.0,
|
|
"reward": 0.9433212876319885,
|
|
"reward_std": 0.1982666552066803,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/asymmetric_l2_reward": 0.8885831832885742,
|
|
"rewards/final_brier_reward_step": 0.6980593800544739,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 171
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5560652017593384,
|
|
"adv/mean_abs_reasoning": 0.41541504859924316,
|
|
"adv/mean_abs_step_conf": 0.755997896194458,
|
|
"adv/ratio_final_to_reasoning": 1.3385774146467728,
|
|
"adv/ratio_step_to_reasoning": 1.819861602856328,
|
|
"adv/std_final_conf": 0.7791886925697327,
|
|
"adv/std_reasoning": 0.6816006898880005,
|
|
"adv/std_step_conf": 0.9320200085639954,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6219101553531798,
|
|
"calib/avg_num_step_conf": 5.6953125,
|
|
"calib/ece": 0.2877777777777778,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.8412698412698413,
|
|
"calib/gap": 0.13524450906864627,
|
|
"calib/mean_conf": 0.8542857142857143,
|
|
"calib/mu_c": 0.893463687150838,
|
|
"calib/mu_w": 0.7582191780821917,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.2158730158730159,
|
|
"calib/std_conf": 0.3380638818457493,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.42098344693281403,
|
|
"calib/step_q_c_n": 1027.0,
|
|
"calib/step_q_gap": 0.06116906178200193,
|
|
"calib/step_q_w": 0.3598143851508121,
|
|
"calib/step_q_w_n": 431.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2329.0,
|
|
"completions/max_terminated_length": 2329.0,
|
|
"completions/mean_length": 474.8515625,
|
|
"completions/mean_terminated_length": 476.7137451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.18346666666666667,
|
|
"grad_norm": 0.029070017859339714,
|
|
"kl": 0.08162689208984375,
|
|
"learning_rate": 7.777777777777779e-07,
|
|
"loss": 0.0266,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03529661148786545,
|
|
"mask/share_reasoning": 0.8278791904449463,
|
|
"mask/share_step_conf": 0.13291800022125244,
|
|
"num_tokens": 41304904.0,
|
|
"reward": 0.9602549076080322,
|
|
"reward_std": 0.18681581318378448,
|
|
"rewards/accuracy_reward_step": 0.69921875,
|
|
"rewards/asymmetric_l2_reward": 0.8800837993621826,
|
|
"rewards/final_brier_reward_step": 0.7037070393562317,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 172
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5546171069145203,
|
|
"adv/mean_abs_reasoning": 0.4558177590370178,
|
|
"adv/mean_abs_step_conf": 0.7512186169624329,
|
|
"adv/ratio_final_to_reasoning": 1.2167518617226116,
|
|
"adv/ratio_step_to_reasoning": 1.6480679000956278,
|
|
"adv/std_final_conf": 0.793694019317627,
|
|
"adv/std_reasoning": 0.7205135226249695,
|
|
"adv/std_step_conf": 0.9342923164367676,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.6639427641394277,
|
|
"calib/avg_num_step_conf": 5.72265625,
|
|
"calib/ece": 0.34315789473684216,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.8987854251012146,
|
|
"calib/gap": 0.16451105384511044,
|
|
"calib/mean_conf": 0.9097165991902834,
|
|
"calib/mu_c": 0.976986301369863,
|
|
"calib/mu_w": 0.8124752475247525,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.96875,
|
|
"calib/nonempty_step_conf_rate": 0.96875,
|
|
"calib/pce": 0.3308906882591094,
|
|
"calib/std_conf": 0.2742769675103127,
|
|
"calib/step_conf_rate": 0.96875,
|
|
"calib/step_q_c": 0.4677382319173364,
|
|
"calib/step_q_c_n": 871.0,
|
|
"calib/step_q_gap": 0.05031398949309396,
|
|
"calib/step_q_w": 0.4174242424242424,
|
|
"calib/step_q_w_n": 594.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2478.0,
|
|
"completions/max_terminated_length": 2478.0,
|
|
"completions/mean_length": 515.125,
|
|
"completions/mean_terminated_length": 517.1451416015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 139.0,
|
|
"epoch": 0.18453333333333333,
|
|
"grad_norm": 0.03989225625991821,
|
|
"kl": 0.07109832763671875,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.0486,
|
|
"mask/has_final_conf_rate": 0.96484375,
|
|
"mask/share_final_conf": 0.035245005041360855,
|
|
"mask/share_reasoning": 0.8358505964279175,
|
|
"mask/share_step_conf": 0.12499810010194778,
|
|
"num_tokens": 41539936.0,
|
|
"reward": 0.8893845081329346,
|
|
"reward_std": 0.1888009011745453,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/asymmetric_l2_reward": 0.8339041471481323,
|
|
"rewards/final_brier_reward_step": 0.6378335952758789,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 173
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6846530437469482,
|
|
"adv/mean_abs_reasoning": 0.6147565841674805,
|
|
"adv/mean_abs_step_conf": 0.7412719130516052,
|
|
"adv/ratio_final_to_reasoning": 1.1136977811699624,
|
|
"adv/ratio_step_to_reasoning": 1.2057974361599642,
|
|
"adv/std_final_conf": 0.8761252164840698,
|
|
"adv/std_reasoning": 0.8266335725784302,
|
|
"adv/std_step_conf": 0.9345950484275818,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6985884485884486,
|
|
"calib/avg_num_step_conf": 5.8828125,
|
|
"calib/ece": 0.3396385542168675,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.6224899598393574,
|
|
"calib/gap": 0.318115773115773,
|
|
"calib/mean_conf": 0.6447791164658635,
|
|
"calib/mu_c": 0.8134188034188033,
|
|
"calib/mu_w": 0.49530303030303036,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.2572690763052209,
|
|
"calib/std_conf": 0.46009433663111154,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.40963017751479286,
|
|
"calib/step_q_c_n": 676.0,
|
|
"calib/step_q_gap": 0.07976270763527482,
|
|
"calib/step_q_w": 0.32986746987951804,
|
|
"calib/step_q_w_n": 830.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2518.0,
|
|
"completions/max_terminated_length": 2518.0,
|
|
"completions/mean_length": 582.76171875,
|
|
"completions/mean_terminated_length": 582.76171875,
|
|
"completions/min_length": 218.0,
|
|
"completions/min_terminated_length": 218.0,
|
|
"epoch": 0.1856,
|
|
"grad_norm": 0.03356742113828659,
|
|
"kl": 0.06365203857421875,
|
|
"learning_rate": 7.222222222222222e-07,
|
|
"loss": -0.0127,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03057742491364479,
|
|
"mask/share_reasoning": 0.8542848825454712,
|
|
"mask/share_step_conf": 0.11513769626617432,
|
|
"num_tokens": 41793355.0,
|
|
"reward": 0.8853936195373535,
|
|
"reward_std": 0.238206684589386,
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
|
"rewards/asymmetric_l2_reward": 0.8359798192977905,
|
|
"rewards/final_brier_reward_step": 0.6488698720932007,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 174
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6600933074951172,
|
|
"adv/mean_abs_reasoning": 0.49963003396987915,
|
|
"adv/mean_abs_step_conf": 0.7545915842056274,
|
|
"adv/ratio_final_to_reasoning": 1.3211641867288382,
|
|
"adv/ratio_step_to_reasoning": 1.5103006883111414,
|
|
"adv/std_final_conf": 0.8539295792579651,
|
|
"adv/std_reasoning": 0.7394165992736816,
|
|
"adv/std_step_conf": 0.9340283274650574,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6916584564860426,
|
|
"calib/avg_num_step_conf": 6.4453125,
|
|
"calib/ece": 0.35928,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.656,
|
|
"calib/gap": 0.317983579638752,
|
|
"calib/mean_conf": 0.67776,
|
|
"calib/mu_c": 0.8621904761904762,
|
|
"calib/mu_w": 0.5442068965517242,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.30852,
|
|
"calib/std_conf": 0.45016061844634964,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4147289156626506,
|
|
"calib/step_q_c_n": 664.0,
|
|
"calib/step_q_gap": 0.12084149172755931,
|
|
"calib/step_q_w": 0.29388742393509126,
|
|
"calib/step_q_w_n": 986.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2887.0,
|
|
"completions/max_terminated_length": 2887.0,
|
|
"completions/mean_length": 553.91015625,
|
|
"completions/mean_terminated_length": 558.2716674804688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.18666666666666668,
|
|
"grad_norm": 0.06960975378751755,
|
|
"kl": 0.06383132934570312,
|
|
"learning_rate": 6.944444444444446e-07,
|
|
"loss": -0.0632,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03212447091937065,
|
|
"mask/share_reasoning": 0.834095299243927,
|
|
"mask/share_step_conf": 0.12596774101257324,
|
|
"num_tokens": 42040980.0,
|
|
"reward": 0.8779298067092896,
|
|
"reward_std": 0.2234022468328476,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/asymmetric_l2_reward": 0.8505501747131348,
|
|
"rewards/final_brier_reward_step": 0.6271843910217285,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 175
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5671244859695435,
|
|
"adv/mean_abs_reasoning": 0.45925626158714294,
|
|
"adv/mean_abs_step_conf": 0.7286878228187561,
|
|
"adv/ratio_final_to_reasoning": 1.2348758926217334,
|
|
"adv/ratio_step_to_reasoning": 1.5866693255318611,
|
|
"adv/std_final_conf": 0.7785314321517944,
|
|
"adv/std_reasoning": 0.7206246852874756,
|
|
"adv/std_step_conf": 0.9327684640884399,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7994992295839753,
|
|
"calib/avg_num_step_conf": 6.03125,
|
|
"calib/ece": 0.23900000000000005,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.68,
|
|
"calib/gap": 0.47578839239856185,
|
|
"calib/mean_conf": 0.70164,
|
|
"calib/mu_c": 0.9262121212121212,
|
|
"calib/mu_w": 0.4504237288135593,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.20632000000000003,
|
|
"calib/std_conf": 0.4420272733667008,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4255120101137801,
|
|
"calib/step_q_c_n": 791.0,
|
|
"calib/step_q_gap": 0.15050537000753839,
|
|
"calib/step_q_w": 0.2750066401062417,
|
|
"calib/step_q_w_n": 753.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2814.0,
|
|
"completions/max_terminated_length": 2814.0,
|
|
"completions/mean_length": 526.4375,
|
|
"completions/mean_terminated_length": 526.4375,
|
|
"completions/min_length": 124.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.18773333333333334,
|
|
"grad_norm": 0.07707049697637558,
|
|
"kl": 0.0714874267578125,
|
|
"learning_rate": 6.666666666666667e-07,
|
|
"loss": -0.0188,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.036508336663246155,
|
|
"mask/share_reasoning": 0.8322881460189819,
|
|
"mask/share_step_conf": 0.13120350241661072,
|
|
"num_tokens": 42279812.0,
|
|
"reward": 0.9646437764167786,
|
|
"reward_std": 0.1994141936302185,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/asymmetric_l2_reward": 0.886325478553772,
|
|
"rewards/final_brier_reward_step": 0.7445245981216431,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 176
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6146119832992554,
|
|
"adv/mean_abs_reasoning": 0.497945100069046,
|
|
"adv/mean_abs_step_conf": 0.7429401278495789,
|
|
"adv/ratio_final_to_reasoning": 1.234296678919086,
|
|
"adv/ratio_step_to_reasoning": 1.4920121269323894,
|
|
"adv/std_final_conf": 0.8337008953094482,
|
|
"adv/std_reasoning": 0.7752436995506287,
|
|
"adv/std_step_conf": 0.9334618449211121,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7455472379969024,
|
|
"calib/avg_num_step_conf": 6.1015625,
|
|
"calib/ece": 0.26134387351778665,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.6324110671936759,
|
|
"calib/gap": 0.40504710893133716,
|
|
"calib/mean_conf": 0.6538339920948617,
|
|
"calib/mu_c": 0.8203355704697987,
|
|
"calib/mu_w": 0.4152884615384615,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.16312252964426888,
|
|
"calib/std_conf": 0.45910542430164514,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4397441860465117,
|
|
"calib/step_q_c_n": 860.0,
|
|
"calib/step_q_gap": 0.13854048234280797,
|
|
"calib/step_q_w": 0.30120370370370375,
|
|
"calib/step_q_w_n": 702.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1611.0,
|
|
"completions/max_terminated_length": 1611.0,
|
|
"completions/mean_length": 505.9140625,
|
|
"completions/mean_terminated_length": 507.8980712890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 196.0,
|
|
"epoch": 0.1888,
|
|
"grad_norm": 0.03708864748477936,
|
|
"kl": 0.08526611328125,
|
|
"learning_rate": 6.388888888888889e-07,
|
|
"loss": -0.016,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.033189140260219574,
|
|
"mask/share_reasoning": 0.8366128206253052,
|
|
"mask/share_step_conf": 0.12629178166389465,
|
|
"num_tokens": 42513158.0,
|
|
"reward": 0.9594067335128784,
|
|
"reward_std": 0.21088215708732605,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/asymmetric_l2_reward": 0.8830938935279846,
|
|
"rewards/final_brier_reward_step": 0.7232195138931274,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 177
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6202833652496338,
|
|
"adv/mean_abs_reasoning": 0.4997982382774353,
|
|
"adv/mean_abs_step_conf": 0.7502584457397461,
|
|
"adv/ratio_final_to_reasoning": 1.2410675303447505,
|
|
"adv/ratio_step_to_reasoning": 1.501122629654572,
|
|
"adv/std_final_conf": 0.8151530623435974,
|
|
"adv/std_reasoning": 0.7575740814208984,
|
|
"adv/std_step_conf": 0.9331055879592896,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.803671928620453,
|
|
"calib/avg_num_step_conf": 5.38671875,
|
|
"calib/ece": 0.20469879518072293,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.7228915662650602,
|
|
"calib/gap": 0.4577343857240904,
|
|
"calib/mean_conf": 0.7482329317269076,
|
|
"calib/mu_c": 0.921032258064516,
|
|
"calib/mu_w": 0.46329787234042563,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.1652208835341366,
|
|
"calib/std_conf": 0.41368664047303105,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.46093316519546024,
|
|
"calib/step_q_c_n": 793.0,
|
|
"calib/step_q_gap": 0.19946558840365136,
|
|
"calib/step_q_w": 0.2614675767918089,
|
|
"calib/step_q_w_n": 586.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2674.0,
|
|
"completions/max_terminated_length": 2674.0,
|
|
"completions/mean_length": 474.35546875,
|
|
"completions/mean_terminated_length": 478.0905456542969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.18986666666666666,
|
|
"grad_norm": 0.04096180945634842,
|
|
"kl": 0.08055877685546875,
|
|
"learning_rate": 6.111111111111112e-07,
|
|
"loss": 0.0305,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.035738177597522736,
|
|
"mask/share_reasoning": 0.8296140432357788,
|
|
"mask/share_step_conf": 0.12683530151844025,
|
|
"num_tokens": 42740665.0,
|
|
"reward": 0.9872586727142334,
|
|
"reward_std": 0.21035568416118622,
|
|
"rewards/accuracy_reward_step": 0.60546875,
|
|
"rewards/asymmetric_l2_reward": 0.8873934745788574,
|
|
"rewards/final_brier_reward_step": 0.7714987993240356,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 178
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5899964570999146,
|
|
"adv/mean_abs_reasoning": 0.5133465528488159,
|
|
"adv/mean_abs_step_conf": 0.7466897964477539,
|
|
"adv/ratio_final_to_reasoning": 1.1493141501111288,
|
|
"adv/ratio_step_to_reasoning": 1.454553054469734,
|
|
"adv/std_final_conf": 0.7958014011383057,
|
|
"adv/std_reasoning": 0.7394075989723206,
|
|
"adv/std_step_conf": 0.9330047965049744,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.7870469798657718,
|
|
"calib/avg_num_step_conf": 5.6953125,
|
|
"calib/ece": 0.2223293172690764,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.6947791164658634,
|
|
"calib/gap": 0.44986040268456373,
|
|
"calib/mean_conf": 0.7183935742971888,
|
|
"calib/mu_c": 0.8990604026845638,
|
|
"calib/mu_w": 0.44920000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.17116465863453825,
|
|
"calib/std_conf": 0.42960127880849125,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.42761737089201884,
|
|
"calib/step_q_c_n": 852.0,
|
|
"calib/step_q_gap": 0.14840945009993967,
|
|
"calib/step_q_w": 0.2792079207920792,
|
|
"calib/step_q_w_n": 606.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2685.0,
|
|
"completions/max_terminated_length": 2685.0,
|
|
"completions/mean_length": 505.24609375,
|
|
"completions/mean_terminated_length": 509.2243957519531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 199.0,
|
|
"epoch": 0.19093333333333334,
|
|
"grad_norm": 0.0683615505695343,
|
|
"kl": 0.069488525390625,
|
|
"learning_rate": 5.833333333333334e-07,
|
|
"loss": -0.0626,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.034047625958919525,
|
|
"mask/share_reasoning": 0.83702552318573,
|
|
"mask/share_step_conf": 0.1211143285036087,
|
|
"num_tokens": 42976272.0,
|
|
"reward": 0.9702746272087097,
|
|
"reward_std": 0.20280179381370544,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/asymmetric_l2_reward": 0.878600001335144,
|
|
"rewards/final_brier_reward_step": 0.7517929673194885,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 179
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5583059787750244,
|
|
"adv/mean_abs_reasoning": 0.3551320731639862,
|
|
"adv/mean_abs_step_conf": 0.7670993804931641,
|
|
"adv/ratio_final_to_reasoning": 1.5721080154796956,
|
|
"adv/ratio_step_to_reasoning": 2.160039710462725,
|
|
"adv/std_final_conf": 0.7800789475440979,
|
|
"adv/std_reasoning": 0.6403230428695679,
|
|
"adv/std_step_conf": 0.9319993853569031,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.752177759629781,
|
|
"calib/avg_num_step_conf": 6.31640625,
|
|
"calib/ece": 0.2102788844621513,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.7051792828685259,
|
|
"calib/gap": 0.463054988430652,
|
|
"calib/mean_conf": 0.7283665338645419,
|
|
"calib/mu_c": 0.8999367088607595,
|
|
"calib/mu_w": 0.4368817204301075,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.1545816733067728,
|
|
"calib/std_conf": 0.42847747819257054,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4166522210184182,
|
|
"calib/step_q_c_n": 923.0,
|
|
"calib/step_q_gap": 0.1318827685688505,
|
|
"calib/step_q_w": 0.2847694524495677,
|
|
"calib/step_q_w_n": 694.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2492.0,
|
|
"completions/max_terminated_length": 2492.0,
|
|
"completions/mean_length": 572.82421875,
|
|
"completions/mean_terminated_length": 577.3346557617188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 130.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.04807959124445915,
|
|
"kl": 0.065093994140625,
|
|
"learning_rate": 5.555555555555555e-07,
|
|
"loss": 0.001,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.0307810977101326,
|
|
"mask/share_reasoning": 0.8431800603866577,
|
|
"mask/share_step_conf": 0.1182263046503067,
|
|
"num_tokens": 43226771.0,
|
|
"reward": 0.9837304353713989,
|
|
"reward_std": 0.17399966716766357,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.8786393404006958,
|
|
"rewards/final_brier_reward_step": 0.7700715065002441,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 180
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5820561647415161,
|
|
"adv/mean_abs_reasoning": 0.41340193152427673,
|
|
"adv/mean_abs_step_conf": 0.7540974617004395,
|
|
"adv/ratio_final_to_reasoning": 1.4079667276719903,
|
|
"adv/ratio_step_to_reasoning": 1.824126604633814,
|
|
"adv/std_final_conf": 0.8107714653015137,
|
|
"adv/std_reasoning": 0.6816644072532654,
|
|
"adv/std_step_conf": 0.933193027973175,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.7967366557045283,
|
|
"calib/avg_num_step_conf": 5.7421875,
|
|
"calib/ece": 0.27083333333333326,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.5952380952380952,
|
|
"calib/gap": 0.4364672400708322,
|
|
"calib/mean_conf": 0.6251984126984127,
|
|
"calib/mu_c": 0.8572881355932203,
|
|
"calib/mu_w": 0.42082089552238805,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.2138888888888888,
|
|
"calib/std_conf": 0.46455683814846205,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42562974203338394,
|
|
"calib/step_q_c_n": 659.0,
|
|
"calib/step_q_gap": 0.15130175189774892,
|
|
"calib/step_q_w": 0.274327990135635,
|
|
"calib/step_q_w_n": 811.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2933.0,
|
|
"completions/max_terminated_length": 2933.0,
|
|
"completions/mean_length": 470.2734375,
|
|
"completions/mean_terminated_length": 473.97637939453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 161.0,
|
|
"epoch": 0.19306666666666666,
|
|
"grad_norm": 0.045435406267642975,
|
|
"kl": 0.08133697509765625,
|
|
"learning_rate": 5.277777777777779e-07,
|
|
"loss": -0.0131,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.03530710190534592,
|
|
"mask/share_reasoning": 0.8284176588058472,
|
|
"mask/share_step_conf": 0.12846270203590393,
|
|
"num_tokens": 43453425.0,
|
|
"reward": 0.9462225437164307,
|
|
"reward_std": 0.16522014141082764,
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
|
"rewards/asymmetric_l2_reward": 0.8868392705917358,
|
|
"rewards/final_brier_reward_step": 0.7165433168411255,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 181
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5667697191238403,
|
|
"adv/mean_abs_reasoning": 0.4072721302509308,
|
|
"adv/mean_abs_step_conf": 0.7169884443283081,
|
|
"adv/ratio_final_to_reasoning": 1.3916241181900637,
|
|
"adv/ratio_step_to_reasoning": 1.7604652787966444,
|
|
"adv/std_final_conf": 0.7674015164375305,
|
|
"adv/std_reasoning": 0.6816434860229492,
|
|
"adv/std_step_conf": 0.9322676658630371,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.7537513208876364,
|
|
"calib/avg_num_step_conf": 6.359375,
|
|
"calib/ece": 0.2442857142857143,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.7420634920634921,
|
|
"calib/gap": 0.35981683691440625,
|
|
"calib/mean_conf": 0.764920634920635,
|
|
"calib/mu_c": 0.8862874251497005,
|
|
"calib/mu_w": 0.5264705882352942,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.17325396825396824,
|
|
"calib/std_conf": 0.4069012711885218,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4113034623217922,
|
|
"calib/step_q_c_n": 982.0,
|
|
"calib/step_q_gap": 0.12497219297194695,
|
|
"calib/step_q_w": 0.28633126934984526,
|
|
"calib/step_q_w_n": 646.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2670.0,
|
|
"completions/max_terminated_length": 2670.0,
|
|
"completions/mean_length": 526.54296875,
|
|
"completions/mean_terminated_length": 530.68896484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 215.0,
|
|
"epoch": 0.19413333333333332,
|
|
"grad_norm": 0.031100839376449585,
|
|
"kl": 0.06735992431640625,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": -0.0536,
|
|
"mask/has_final_conf_rate": 0.984375,
|
|
"mask/share_final_conf": 0.031918980181217194,
|
|
"mask/share_reasoning": 0.8300646543502808,
|
|
"mask/share_step_conf": 0.13020385801792145,
|
|
"num_tokens": 43694380.0,
|
|
"reward": 0.9876556396484375,
|
|
"reward_std": 0.1890118420124054,
|
|
"rewards/accuracy_reward_step": 0.65234375,
|
|
"rewards/asymmetric_l2_reward": 0.8993324041366577,
|
|
"rewards/final_brier_reward_step": 0.7494163513183594,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 182
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6406201720237732,
|
|
"adv/mean_abs_reasoning": 0.4643709659576416,
|
|
"adv/mean_abs_step_conf": 0.7473157644271851,
|
|
"adv/ratio_final_to_reasoning": 1.37954398312277,
|
|
"adv/ratio_step_to_reasoning": 1.6093076854752215,
|
|
"adv/std_final_conf": 0.844761073589325,
|
|
"adv/std_reasoning": 0.7206538319587708,
|
|
"adv/std_step_conf": 0.9331881999969482,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.707851110416015,
|
|
"calib/avg_num_step_conf": 5.57421875,
|
|
"calib/ece": 0.3112598425196852,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.6181102362204725,
|
|
"calib/gap": 0.3128683140444166,
|
|
"calib/mean_conf": 0.6425196850393701,
|
|
"calib/mu_c": 0.7841726618705036,
|
|
"calib/mu_w": 0.471304347826087,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.20326771653543318,
|
|
"calib/std_conf": 0.46144068898940727,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40705179282868525,
|
|
"calib/step_q_c_n": 753.0,
|
|
"calib/step_q_gap": 0.12211114000969414,
|
|
"calib/step_q_w": 0.2849406528189911,
|
|
"calib/step_q_w_n": 674.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2040.0,
|
|
"completions/max_terminated_length": 2040.0,
|
|
"completions/mean_length": 522.02734375,
|
|
"completions/mean_terminated_length": 522.02734375,
|
|
"completions/min_length": 202.0,
|
|
"completions/min_terminated_length": 202.0,
|
|
"epoch": 0.1952,
|
|
"grad_norm": 0.035230036824941635,
|
|
"kl": 0.067352294921875,
|
|
"learning_rate": 4.7222222222222226e-07,
|
|
"loss": -0.1027,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.03272725269198418,
|
|
"mask/share_reasoning": 0.8501238226890564,
|
|
"mask/share_step_conf": 0.11714892089366913,
|
|
"num_tokens": 43934699.0,
|
|
"reward": 0.9388773441314697,
|
|
"reward_std": 0.19883155822753906,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.8908123970031738,
|
|
"rewards/final_brier_reward_step": 0.6799108982086182,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 183
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.599378764629364,
|
|
"adv/mean_abs_reasoning": 0.5096433162689209,
|
|
"adv/mean_abs_step_conf": 0.7571796178817749,
|
|
"adv/ratio_final_to_reasoning": 1.1760750028419735,
|
|
"adv/ratio_step_to_reasoning": 1.4857049895700738,
|
|
"adv/std_final_conf": 0.8223716020584106,
|
|
"adv/std_reasoning": 0.757595956325531,
|
|
"adv/std_step_conf": 0.9336126446723938,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.7833777481678881,
|
|
"calib/avg_num_step_conf": 6.16796875,
|
|
"calib/ece": 0.18841897233201577,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.691699604743083,
|
|
"calib/gap": 0.5367495003331113,
|
|
"calib/mean_conf": 0.7044664031620554,
|
|
"calib/mu_c": 0.9060126582278482,
|
|
"calib/mu_w": 0.36926315789473685,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.13418972332015805,
|
|
"calib/std_conf": 0.44528026864656384,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4251701427003294,
|
|
"calib/step_q_c_n": 911.0,
|
|
"calib/step_q_gap": 0.13512523252068864,
|
|
"calib/step_q_w": 0.29004491017964074,
|
|
"calib/step_q_w_n": 668.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2798.0,
|
|
"completions/max_terminated_length": 2798.0,
|
|
"completions/mean_length": 522.33203125,
|
|
"completions/mean_terminated_length": 522.33203125,
|
|
"completions/min_length": 157.0,
|
|
"completions/min_terminated_length": 157.0,
|
|
"epoch": 0.19626666666666667,
|
|
"grad_norm": 0.03629198670387268,
|
|
"kl": 0.06891632080078125,
|
|
"learning_rate": 4.444444444444445e-07,
|
|
"loss": 0.0815,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.032990530133247375,
|
|
"mask/share_reasoning": 0.843195378780365,
|
|
"mask/share_step_conf": 0.12381406873464584,
|
|
"num_tokens": 44173696.0,
|
|
"reward": 1.0000405311584473,
|
|
"reward_std": 0.20342886447906494,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/asymmetric_l2_reward": 0.8806300759315491,
|
|
"rewards/final_brier_reward_step": 0.7991386651992798,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 184
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5196036696434021,
|
|
"adv/mean_abs_reasoning": 0.3922334909439087,
|
|
"adv/mean_abs_step_conf": 0.7511488199234009,
|
|
"adv/ratio_final_to_reasoning": 1.3247305027242255,
|
|
"adv/ratio_step_to_reasoning": 1.9150552853499674,
|
|
"adv/std_final_conf": 0.7578298449516296,
|
|
"adv/std_reasoning": 0.6816517114639282,
|
|
"adv/std_step_conf": 0.9322458505630493,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.790268456375839,
|
|
"calib/avg_num_step_conf": 6.34765625,
|
|
"calib/ece": 0.18509960159362543,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.6573705179282868,
|
|
"calib/gap": 0.5555230951440977,
|
|
"calib/mean_conf": 0.6780079681274901,
|
|
"calib/mu_c": 0.9037583892617448,
|
|
"calib/mu_w": 0.3482352941176471,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.1347410358565736,
|
|
"calib/std_conf": 0.45274111572155207,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4383154121863799,
|
|
"calib/step_q_c_n": 837.0,
|
|
"calib/step_q_gap": 0.1932646507650601,
|
|
"calib/step_q_w": 0.24505076142131982,
|
|
"calib/step_q_w_n": 788.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2399.0,
|
|
"completions/max_terminated_length": 2399.0,
|
|
"completions/mean_length": 518.94921875,
|
|
"completions/mean_terminated_length": 520.984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.0,
|
|
"epoch": 0.19733333333333333,
|
|
"grad_norm": 0.04340367391705513,
|
|
"kl": 0.06716156005859375,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"loss": -0.041,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03309101611375809,
|
|
"mask/share_reasoning": 0.8382681012153625,
|
|
"mask/share_step_conf": 0.12473461031913757,
|
|
"num_tokens": 44413467.0,
|
|
"reward": 0.9960746765136719,
|
|
"reward_std": 0.16638922691345215,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/asymmetric_l2_reward": 0.8855493068695068,
|
|
"rewards/final_brier_reward_step": 0.7948812246322632,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 185
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5315274000167847,
|
|
"adv/mean_abs_reasoning": 0.4281071424484253,
|
|
"adv/mean_abs_step_conf": 0.7454137206077576,
|
|
"adv/ratio_final_to_reasoning": 1.2415756415015162,
|
|
"adv/ratio_step_to_reasoning": 1.7411849667926498,
|
|
"adv/std_final_conf": 0.7777411937713623,
|
|
"adv/std_reasoning": 0.7204946279525757,
|
|
"adv/std_step_conf": 0.9312522411346436,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.765589455372675,
|
|
"calib/avg_num_step_conf": 6.1953125,
|
|
"calib/ece": 0.22940711462450591,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6245059288537549,
|
|
"calib/gap": 0.47976916900843036,
|
|
"calib/mean_conf": 0.6343873517786561,
|
|
"calib/mu_c": 0.8126415094339623,
|
|
"calib/mu_w": 0.33287234042553193,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.11766798418972331,
|
|
"calib/std_conf": 0.46896248296948856,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.41842163355408385,
|
|
"calib/step_q_c_n": 906.0,
|
|
"calib/step_q_gap": 0.15374516296584861,
|
|
"calib/step_q_w": 0.26467647058823524,
|
|
"calib/step_q_w_n": 680.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2520.0,
|
|
"completions/max_terminated_length": 2520.0,
|
|
"completions/mean_length": 516.51171875,
|
|
"completions/mean_terminated_length": 520.5787353515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 189.0,
|
|
"epoch": 0.1984,
|
|
"grad_norm": 0.031362757086753845,
|
|
"kl": 0.07161712646484375,
|
|
"learning_rate": 3.8888888888888895e-07,
|
|
"loss": 0.0144,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03271438926458359,
|
|
"mask/share_reasoning": 0.8355770707130432,
|
|
"mask/share_step_conf": 0.12389606237411499,
|
|
"num_tokens": 44650734.0,
|
|
"reward": 0.9942600131034851,
|
|
"reward_std": 0.15620407462120056,
|
|
"rewards/accuracy_reward_step": 0.62109375,
|
|
"rewards/asymmetric_l2_reward": 0.9050840139389038,
|
|
"rewards/final_brier_reward_step": 0.7615609169006348,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 186
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.7074639201164246,
|
|
"adv/mean_abs_reasoning": 0.5609918832778931,
|
|
"adv/mean_abs_step_conf": 0.7377896308898926,
|
|
"adv/ratio_final_to_reasoning": 1.2610947523566487,
|
|
"adv/ratio_step_to_reasoning": 1.3151520599174533,
|
|
"adv/std_final_conf": 0.878374457359314,
|
|
"adv/std_reasoning": 0.7928860783576965,
|
|
"adv/std_step_conf": 0.9338283538818359,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.6919585826665771,
|
|
"calib/avg_num_step_conf": 6.58203125,
|
|
"calib/ece": 0.326178861788618,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 0.6260162601626016,
|
|
"calib/gap": 0.28398171182680026,
|
|
"calib/mean_conf": 0.655040650406504,
|
|
"calib/mu_c": 0.7785611510791367,
|
|
"calib/mu_w": 0.49457943925233644,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.20808943089430904,
|
|
"calib/std_conf": 0.4556517247662998,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.3989940119760479,
|
|
"calib/step_q_c_n": 835.0,
|
|
"calib/step_q_gap": 0.13965283550545965,
|
|
"calib/step_q_w": 0.25934117647058824,
|
|
"calib/step_q_w_n": 850.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2988.0,
|
|
"completions/max_terminated_length": 2988.0,
|
|
"completions/mean_length": 573.05078125,
|
|
"completions/mean_terminated_length": 573.05078125,
|
|
"completions/min_length": 167.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.19946666666666665,
|
|
"grad_norm": 0.047245342284440994,
|
|
"kl": 0.06989288330078125,
|
|
"learning_rate": 3.611111111111111e-07,
|
|
"loss": 0.0154,
|
|
"mask/has_final_conf_rate": 0.9609375,
|
|
"mask/share_final_conf": 0.03235046565532684,
|
|
"mask/share_reasoning": 0.8461363315582275,
|
|
"mask/share_step_conf": 0.1215132549405098,
|
|
"num_tokens": 44898979.0,
|
|
"reward": 0.8949373960494995,
|
|
"reward_std": 0.23922453820705414,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.8468563556671143,
|
|
"rewards/final_brier_reward_step": 0.6437996029853821,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"step": 187
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5643256902694702,
|
|
"adv/mean_abs_reasoning": 0.47549188137054443,
|
|
"adv/mean_abs_step_conf": 0.7558174729347229,
|
|
"adv/ratio_final_to_reasoning": 1.186825080257677,
|
|
"adv/ratio_step_to_reasoning": 1.5895486390980553,
|
|
"adv/std_final_conf": 0.7799937129020691,
|
|
"adv/std_reasoning": 0.7574599981307983,
|
|
"adv/std_step_conf": 0.9330994486808777,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.6968869290509412,
|
|
"calib/avg_num_step_conf": 5.8203125,
|
|
"calib/ece": 0.2914859437751004,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.6987951807228916,
|
|
"calib/gap": 0.32033960773989734,
|
|
"calib/mean_conf": 0.7150200803212851,
|
|
"calib/mu_c": 0.8526760563380282,
|
|
"calib/mu_w": 0.5323364485981309,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.21811244979919678,
|
|
"calib/std_conf": 0.4348300914519625,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.43332065906210393,
|
|
"calib/step_q_c_n": 789.0,
|
|
"calib/step_q_gap": 0.10563164337023517,
|
|
"calib/step_q_w": 0.32768901569186876,
|
|
"calib/step_q_w_n": 701.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2533.0,
|
|
"completions/max_terminated_length": 2533.0,
|
|
"completions/mean_length": 534.078125,
|
|
"completions/mean_terminated_length": 536.172607421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 119.0,
|
|
"epoch": 0.20053333333333334,
|
|
"grad_norm": 0.038735099136829376,
|
|
"kl": 0.07011795043945312,
|
|
"learning_rate": 3.3333333333333335e-07,
|
|
"loss": 0.0616,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.033644575625658035,
|
|
"mask/share_reasoning": 0.8366734981536865,
|
|
"mask/share_step_conf": 0.12577570974826813,
|
|
"num_tokens": 45139775.0,
|
|
"reward": 0.9236536026000977,
|
|
"reward_std": 0.19174334406852722,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/asymmetric_l2_reward": 0.8591135144233704,
|
|
"rewards/final_brier_reward_step": 0.6827249526977539,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 188
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6418702602386475,
|
|
"adv/mean_abs_reasoning": 0.47118186950683594,
|
|
"adv/mean_abs_step_conf": 0.7607347965240479,
|
|
"adv/ratio_final_to_reasoning": 1.3622558544335823,
|
|
"adv/ratio_step_to_reasoning": 1.6145247636974518,
|
|
"adv/std_final_conf": 0.8555505275726318,
|
|
"adv/std_reasoning": 0.7392024993896484,
|
|
"adv/std_step_conf": 0.9324488639831543,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.7302891744933266,
|
|
"calib/avg_num_step_conf": 5.65234375,
|
|
"calib/ece": 0.2803529411764706,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.5647058823529412,
|
|
"calib/gap": 0.35672268907563026,
|
|
"calib/mean_conf": 0.6170588235294119,
|
|
"calib/mu_c": 0.7835294117647059,
|
|
"calib/mu_w": 0.42680672268907566,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.18203921568627449,
|
|
"calib/std_conf": 0.46179655191796504,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.38990264255910995,
|
|
"calib/step_q_c_n": 719.0,
|
|
"calib/step_q_gap": 0.1266883568448242,
|
|
"calib/step_q_w": 0.26321428571428573,
|
|
"calib/step_q_w_n": 728.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1315.0,
|
|
"completions/max_terminated_length": 1315.0,
|
|
"completions/mean_length": 488.4453125,
|
|
"completions/mean_terminated_length": 490.3608093261719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 138.0,
|
|
"epoch": 0.2016,
|
|
"grad_norm": 0.04739164561033249,
|
|
"kl": 0.07581329345703125,
|
|
"learning_rate": 3.055555555555556e-07,
|
|
"loss": -0.0497,
|
|
"mask/has_final_conf_rate": 0.9921875,
|
|
"mask/share_final_conf": 0.0347851887345314,
|
|
"mask/share_reasoning": 0.8378597497940063,
|
|
"mask/share_step_conf": 0.12344881892204285,
|
|
"num_tokens": 45372585.0,
|
|
"reward": 0.9458571672439575,
|
|
"reward_std": 0.20587018132209778,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/asymmetric_l2_reward": 0.8852865099906921,
|
|
"rewards/final_brier_reward_step": 0.7017402648925781,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 189
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6198446750640869,
|
|
"adv/mean_abs_reasoning": 0.4780905246734619,
|
|
"adv/mean_abs_step_conf": 0.7499421834945679,
|
|
"adv/ratio_final_to_reasoning": 1.2965006480466095,
|
|
"adv/ratio_step_to_reasoning": 1.568619633293887,
|
|
"adv/std_final_conf": 0.8346047401428223,
|
|
"adv/std_reasoning": 0.7392293810844421,
|
|
"adv/std_step_conf": 0.9328544735908508,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.7546182266009852,
|
|
"calib/avg_num_step_conf": 5.98046875,
|
|
"calib/ece": 0.25703124999999993,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.6328125,
|
|
"calib/gap": 0.4335714285714287,
|
|
"calib/mean_conf": 0.6471093749999999,
|
|
"calib/mu_c": 0.8435714285714287,
|
|
"calib/mu_w": 0.41000000000000003,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.17863281249999996,
|
|
"calib/std_conf": 0.4635092912090429,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.39644417475728155,
|
|
"calib/step_q_c_n": 824.0,
|
|
"calib/step_q_gap": 0.11338901209815849,
|
|
"calib/step_q_w": 0.28305516265912306,
|
|
"calib/step_q_w_n": 707.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1462.0,
|
|
"completions/max_terminated_length": 1462.0,
|
|
"completions/mean_length": 529.01953125,
|
|
"completions/mean_terminated_length": 531.0941772460938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 174.0,
|
|
"epoch": 0.20266666666666666,
|
|
"grad_norm": 0.028230194002389908,
|
|
"kl": 0.06874847412109375,
|
|
"learning_rate": 2.7777777777777776e-07,
|
|
"loss": -0.0593,
|
|
"mask/has_final_conf_rate": 0.99609375,
|
|
"mask/share_final_conf": 0.0311330147087574,
|
|
"mask/share_reasoning": 0.8457903265953064,
|
|
"mask/share_step_conf": 0.1191704124212265,
|
|
"num_tokens": 45613622.0,
|
|
"reward": 0.9775964021682739,
|
|
"reward_std": 0.17072449624538422,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/asymmetric_l2_reward": 0.9036279320716858,
|
|
"rewards/final_brier_reward_step": 0.742189884185791,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 190
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.614532470703125,
|
|
"adv/mean_abs_reasoning": 0.43729501962661743,
|
|
"adv/mean_abs_step_conf": 0.7394310235977173,
|
|
"adv/ratio_final_to_reasoning": 1.405304069613784,
|
|
"adv/ratio_step_to_reasoning": 1.6909202950197728,
|
|
"adv/std_final_conf": 0.8193501830101013,
|
|
"adv/std_reasoning": 0.7205649018287659,
|
|
"adv/std_step_conf": 0.9335800409317017,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6994230528032108,
|
|
"calib/avg_num_step_conf": 6.80078125,
|
|
"calib/ece": 0.3281818181818182,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6284584980237155,
|
|
"calib/gap": 0.30981500062711653,
|
|
"calib/mean_conf": 0.6575889328063241,
|
|
"calib/mu_c": 0.8216806722689076,
|
|
"calib/mu_w": 0.5118656716417911,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.25770750988142294,
|
|
"calib/std_conf": 0.45239821753549814,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.42917261055634803,
|
|
"calib/step_q_c_n": 701.0,
|
|
"calib/step_q_gap": 0.12468222594096345,
|
|
"calib/step_q_w": 0.3044903846153846,
|
|
"calib/step_q_w_n": 1040.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2682.0,
|
|
"completions/max_terminated_length": 2682.0,
|
|
"completions/mean_length": 520.515625,
|
|
"completions/mean_terminated_length": 520.515625,
|
|
"completions/min_length": 140.0,
|
|
"completions/min_terminated_length": 140.0,
|
|
"epoch": 0.20373333333333332,
|
|
"grad_norm": 0.04467106983065605,
|
|
"kl": 0.07276153564453125,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": 0.0413,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03575979173183441,
|
|
"mask/share_reasoning": 0.82267165184021,
|
|
"mask/share_step_conf": 0.14156854152679443,
|
|
"num_tokens": 45851042.0,
|
|
"reward": 0.9081317186355591,
|
|
"reward_std": 0.185234934091568,
|
|
"rewards/accuracy_reward_step": 0.46484375,
|
|
"rewards/asymmetric_l2_reward": 0.8679160475730896,
|
|
"rewards/final_brier_reward_step": 0.6577222943305969,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 191
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6072635650634766,
|
|
"adv/mean_abs_reasoning": 0.47368013858795166,
|
|
"adv/mean_abs_step_conf": 0.7371071577072144,
|
|
"adv/ratio_final_to_reasoning": 1.2820118801555398,
|
|
"adv/ratio_step_to_reasoning": 1.5561284876848394,
|
|
"adv/std_final_conf": 0.7935887575149536,
|
|
"adv/std_reasoning": 0.7393070459365845,
|
|
"adv/std_step_conf": 0.9304554462432861,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.797986798679868,
|
|
"calib/avg_num_step_conf": 5.640625,
|
|
"calib/ece": 0.19027888446215144,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.5976095617529881,
|
|
"calib/gap": 0.5394666666666668,
|
|
"calib/mean_conf": 0.6223904382470119,
|
|
"calib/mu_c": 0.8394666666666667,
|
|
"calib/mu_w": 0.3,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.10752988047808769,
|
|
"calib/std_conf": 0.4627484397319022,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.40863046044864226,
|
|
"calib/step_q_c_n": 847.0,
|
|
"calib/step_q_gap": 0.12956915391597895,
|
|
"calib/step_q_w": 0.2790613065326633,
|
|
"calib/step_q_w_n": 597.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3016.0,
|
|
"completions/max_terminated_length": 3016.0,
|
|
"completions/mean_length": 535.84765625,
|
|
"completions/mean_terminated_length": 537.9490356445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 144.0,
|
|
"epoch": 0.2048,
|
|
"grad_norm": 0.047735992819070816,
|
|
"kl": 0.07106781005859375,
|
|
"learning_rate": 2.2222222222222224e-07,
|
|
"loss": 0.0159,
|
|
"mask/has_final_conf_rate": 0.98046875,
|
|
"mask/share_final_conf": 0.03571078181266785,
|
|
"mask/share_reasoning": 0.8375634551048279,
|
|
"mask/share_step_conf": 0.12281954288482666,
|
|
"num_tokens": 46093195.0,
|
|
"reward": 0.9984359741210938,
|
|
"reward_std": 0.19008705019950867,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/asymmetric_l2_reward": 0.8958485126495361,
|
|
"rewards/final_brier_reward_step": 0.7885234355926514,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 192
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.663608729839325,
|
|
"adv/mean_abs_reasoning": 0.5208760499954224,
|
|
"adv/mean_abs_step_conf": 0.7138371467590332,
|
|
"adv/ratio_final_to_reasoning": 1.274024270928097,
|
|
"adv/ratio_step_to_reasoning": 1.3704549225584601,
|
|
"adv/std_final_conf": 0.8608205318450928,
|
|
"adv/std_reasoning": 0.7394456267356873,
|
|
"adv/std_step_conf": 0.9336157441139221,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7956674862117085,
|
|
"calib/avg_num_step_conf": 5.92578125,
|
|
"calib/ece": 0.22696000000000016,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.592,
|
|
"calib/gap": 0.4576955279420558,
|
|
"calib/mean_conf": 0.6274400000000001,
|
|
"calib/mu_c": 0.8123489932885906,
|
|
"calib/mu_w": 0.35465346534653475,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.12920000000000015,
|
|
"calib/std_conf": 0.45731547797991706,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"calib/step_q_c": 0.4241108247422681,
|
|
"calib/step_q_c_n": 776.0,
|
|
"calib/step_q_gap": 0.1588341715708781,
|
|
"calib/step_q_w": 0.26527665317139,
|
|
"calib/step_q_w_n": 741.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3030.0,
|
|
"completions/max_terminated_length": 3030.0,
|
|
"completions/mean_length": 552.3515625,
|
|
"completions/mean_terminated_length": 552.3515625,
|
|
"completions/min_length": 215.0,
|
|
"completions/min_terminated_length": 215.0,
|
|
"epoch": 0.20586666666666667,
|
|
"grad_norm": 0.04746817424893379,
|
|
"kl": 0.068115234375,
|
|
"learning_rate": 1.9444444444444447e-07,
|
|
"loss": -0.004,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.031759485602378845,
|
|
"mask/share_reasoning": 0.846390426158905,
|
|
"mask/share_step_conf": 0.12185005843639374,
|
|
"num_tokens": 46340309.0,
|
|
"reward": 0.9673618674278259,
|
|
"reward_std": 0.2169458270072937,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/asymmetric_l2_reward": 0.8808885812759399,
|
|
"rewards/final_brier_reward_step": 0.7436789274215698,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 193
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5674813985824585,
|
|
"adv/mean_abs_reasoning": 0.3744353652000427,
|
|
"adv/mean_abs_step_conf": 0.7580137252807617,
|
|
"adv/ratio_final_to_reasoning": 1.5155657059243872,
|
|
"adv/ratio_step_to_reasoning": 2.0244180858178065,
|
|
"adv/std_final_conf": 0.8004591464996338,
|
|
"adv/std_reasoning": 0.6403605937957764,
|
|
"adv/std_step_conf": 0.933774471282959,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.8198921359588611,
|
|
"calib/avg_num_step_conf": 5.44921875,
|
|
"calib/ece": 0.21711462450592878,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.6205533596837944,
|
|
"calib/gap": 0.5331562774363476,
|
|
"calib/mean_conf": 0.6388537549407114,
|
|
"calib/mu_c": 0.8896268656716418,
|
|
"calib/mu_w": 0.35647058823529415,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.1631620553359683,
|
|
"calib/std_conf": 0.4687978385135658,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"calib/step_q_c": 0.44545193687230994,
|
|
"calib/step_q_c_n": 697.0,
|
|
"calib/step_q_gap": 0.17214248128491738,
|
|
"calib/step_q_w": 0.27330945558739256,
|
|
"calib/step_q_w_n": 698.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2128.0,
|
|
"completions/max_terminated_length": 2128.0,
|
|
"completions/mean_length": 478.26171875,
|
|
"completions/mean_terminated_length": 478.26171875,
|
|
"completions/min_length": 146.0,
|
|
"completions/min_terminated_length": 146.0,
|
|
"epoch": 0.20693333333333333,
|
|
"grad_norm": 0.03423633426427841,
|
|
"kl": 0.06972503662109375,
|
|
"learning_rate": 1.6666666666666668e-07,
|
|
"loss": -0.0187,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.03547438606619835,
|
|
"mask/share_reasoning": 0.8433297276496887,
|
|
"mask/share_step_conf": 0.12119589745998383,
|
|
"num_tokens": 46568688.0,
|
|
"reward": 0.9822894334793091,
|
|
"reward_std": 0.18843895196914673,
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
|
"rewards/asymmetric_l2_reward": 0.8912980556488037,
|
|
"rewards/final_brier_reward_step": 0.7717183232307434,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 194
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5949089527130127,
|
|
"adv/mean_abs_reasoning": 0.3306322693824768,
|
|
"adv/mean_abs_step_conf": 0.7501680850982666,
|
|
"adv/ratio_final_to_reasoning": 1.799306987863364,
|
|
"adv/ratio_step_to_reasoning": 2.268889502223599,
|
|
"adv/std_final_conf": 0.8255235552787781,
|
|
"adv/std_reasoning": 0.6185620427131653,
|
|
"adv/std_step_conf": 0.9332430362701416,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7805431330611187,
|
|
"calib/avg_num_step_conf": 5.890625,
|
|
"calib/ece": 0.20566800000000002,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.592,
|
|
"calib/gap": 0.5285356795644565,
|
|
"calib/mean_conf": 0.623028,
|
|
"calib/mu_c": 0.8576978417266186,
|
|
"calib/mu_w": 0.32916216216216215,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.13634800000000002,
|
|
"calib/std_conf": 0.4616225159326611,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4317737789203085,
|
|
"calib/step_q_c_n": 778.0,
|
|
"calib/step_q_gap": 0.16059569672852764,
|
|
"calib/step_q_w": 0.27117808219178086,
|
|
"calib/step_q_w_n": 730.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2909.0,
|
|
"completions/max_terminated_length": 2909.0,
|
|
"completions/mean_length": 512.59765625,
|
|
"completions/mean_terminated_length": 516.6338500976562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.04133572801947594,
|
|
"kl": 0.06982421875,
|
|
"learning_rate": 1.3888888888888888e-07,
|
|
"loss": -0.0151,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03313131630420685,
|
|
"mask/share_reasoning": 0.8336950540542603,
|
|
"mask/share_step_conf": 0.1253610998392105,
|
|
"num_tokens": 46805897.0,
|
|
"reward": 0.985278844833374,
|
|
"reward_std": 0.18139265477657318,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/asymmetric_l2_reward": 0.888818621635437,
|
|
"rewards/final_brier_reward_step": 0.7778328061103821,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 195
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5143544673919678,
|
|
"adv/mean_abs_reasoning": 0.3996366560459137,
|
|
"adv/mean_abs_step_conf": 0.7586710453033447,
|
|
"adv/ratio_final_to_reasoning": 1.2870552778643867,
|
|
"adv/ratio_step_to_reasoning": 1.8984020455225261,
|
|
"adv/std_final_conf": 0.7629029750823975,
|
|
"adv/std_reasoning": 0.6816370487213135,
|
|
"adv/std_step_conf": 0.9331806898117065,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7817218627077782,
|
|
"calib/avg_num_step_conf": 5.32421875,
|
|
"calib/ece": 0.2220553359683795,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.6956521739130435,
|
|
"calib/gap": 0.4824356046187033,
|
|
"calib/mean_conf": 0.7189723320158102,
|
|
"calib/mu_c": 0.9306338028169014,
|
|
"calib/mu_w": 0.44819819819819817,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.18988142292490126,
|
|
"calib/std_conf": 0.43153981494492955,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.4720873124147339,
|
|
"calib/step_q_c_n": 733.0,
|
|
"calib/step_q_gap": 0.14921429654171803,
|
|
"calib/step_q_w": 0.32287301587301587,
|
|
"calib/step_q_w_n": 630.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2488.0,
|
|
"completions/max_terminated_length": 2488.0,
|
|
"completions/mean_length": 425.453125,
|
|
"completions/mean_terminated_length": 428.80316162109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.20906666666666668,
|
|
"grad_norm": 0.03610050305724144,
|
|
"kl": 0.08242034912109375,
|
|
"learning_rate": 1.1111111111111112e-07,
|
|
"loss": -0.0574,
|
|
"mask/has_final_conf_rate": 0.98828125,
|
|
"mask/share_final_conf": 0.037843845784664154,
|
|
"mask/share_reasoning": 0.8252567052841187,
|
|
"mask/share_step_conf": 0.12908688187599182,
|
|
"num_tokens": 47017357.0,
|
|
"reward": 0.9776846170425415,
|
|
"reward_std": 0.17640987038612366,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/asymmetric_l2_reward": 0.8756676912307739,
|
|
"rewards/final_brier_reward_step": 0.7711077928543091,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 196
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5709211826324463,
|
|
"adv/mean_abs_reasoning": 0.5281293988227844,
|
|
"adv/mean_abs_step_conf": 0.7545597553253174,
|
|
"adv/ratio_final_to_reasoning": 1.081025187965385,
|
|
"adv/ratio_step_to_reasoning": 1.4287402992661509,
|
|
"adv/std_final_conf": 0.7653646469116211,
|
|
"adv/std_reasoning": 0.7754925489425659,
|
|
"adv/std_step_conf": 0.932712972164154,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.8252395752395751,
|
|
"calib/avg_num_step_conf": 6.26953125,
|
|
"calib/ece": 0.2161441767068273,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.6144578313253012,
|
|
"calib/gap": 0.5121394522144523,
|
|
"calib/mean_conf": 0.6553417670682731,
|
|
"calib/mu_c": 0.8959856060606062,
|
|
"calib/mu_w": 0.38384615384615384,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.1706827309236948,
|
|
"calib/std_conf": 0.4506214670081801,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.43755376344086017,
|
|
"calib/step_q_c_n": 744.0,
|
|
"calib/step_q_gap": 0.16742600502041882,
|
|
"calib/step_q_w": 0.27012775842044134,
|
|
"calib/step_q_w_n": 861.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2589.0,
|
|
"completions/max_terminated_length": 2589.0,
|
|
"completions/mean_length": 550.2890625,
|
|
"completions/mean_terminated_length": 550.2890625,
|
|
"completions/min_length": 182.0,
|
|
"completions/min_terminated_length": 182.0,
|
|
"epoch": 0.21013333333333334,
|
|
"grad_norm": 0.04556097462773323,
|
|
"kl": 0.06296539306640625,
|
|
"learning_rate": 8.333333333333334e-08,
|
|
"loss": 0.0593,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.03290029242634773,
|
|
"mask/share_reasoning": 0.8371220231056213,
|
|
"mask/share_step_conf": 0.12997770309448242,
|
|
"num_tokens": 47263287.0,
|
|
"reward": 0.9729477167129517,
|
|
"reward_std": 0.1934598684310913,
|
|
"rewards/accuracy_reward_step": 0.515625,
|
|
"rewards/asymmetric_l2_reward": 0.8871469497680664,
|
|
"rewards/final_brier_reward_step": 0.7618734240531921,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 197
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6123339533805847,
|
|
"adv/mean_abs_reasoning": 0.5126949548721313,
|
|
"adv/mean_abs_step_conf": 0.7387211918830872,
|
|
"adv/ratio_final_to_reasoning": 1.1943436297969887,
|
|
"adv/ratio_step_to_reasoning": 1.440859101231702,
|
|
"adv/std_final_conf": 0.8177485466003418,
|
|
"adv/std_reasoning": 0.7576181292533875,
|
|
"adv/std_step_conf": 0.931955099105835,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.8303197064989518,
|
|
"calib/avg_num_step_conf": 5.9921875,
|
|
"calib/ece": 0.16856000000000004,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.524,
|
|
"calib/gap": 0.6000628930817611,
|
|
"calib/mean_conf": 0.5522400000000001,
|
|
"calib/mu_c": 0.8066666666666668,
|
|
"calib/mu_w": 0.20660377358490564,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.07240000000000002,
|
|
"calib/std_conf": 0.47586529858774107,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.41482986111111114,
|
|
"calib/step_q_c_n": 864.0,
|
|
"calib/step_q_gap": 0.10320299543946931,
|
|
"calib/step_q_w": 0.31162686567164183,
|
|
"calib/step_q_w_n": 670.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2589.0,
|
|
"completions/max_terminated_length": 2589.0,
|
|
"completions/mean_length": 475.43359375,
|
|
"completions/mean_terminated_length": 481.0711669921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.2112,
|
|
"grad_norm": 0.03035557083785534,
|
|
"kl": 0.07807159423828125,
|
|
"learning_rate": 5.555555555555556e-08,
|
|
"loss": 0.0208,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03742978721857071,
|
|
"mask/share_reasoning": 0.8121263384819031,
|
|
"mask/share_step_conf": 0.13872510194778442,
|
|
"num_tokens": 47490382.0,
|
|
"reward": 1.0016413927078247,
|
|
"reward_std": 0.15997019410133362,
|
|
"rewards/accuracy_reward_step": 0.5625,
|
|
"rewards/asymmetric_l2_reward": 0.8928694128990173,
|
|
"rewards/final_brier_reward_step": 0.8026007413864136,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 198
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.6816428899765015,
|
|
"adv/mean_abs_reasoning": 0.537695050239563,
|
|
"adv/mean_abs_step_conf": 0.7122054100036621,
|
|
"adv/ratio_final_to_reasoning": 1.267712785663183,
|
|
"adv/ratio_step_to_reasoning": 1.3245526617482313,
|
|
"adv/std_final_conf": 0.8624335527420044,
|
|
"adv/std_reasoning": 0.7929316163063049,
|
|
"adv/std_step_conf": 0.9336603283882141,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7438086548488009,
|
|
"calib/avg_num_step_conf": 5.76171875,
|
|
"calib/ece": 0.27726907630522085,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.642570281124498,
|
|
"calib/gap": 0.3785968456725758,
|
|
"calib/mean_conf": 0.6783935742971888,
|
|
"calib/mu_c": 0.8486861313868614,
|
|
"calib/mu_w": 0.47008928571428565,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.2027309236947791,
|
|
"calib/std_conf": 0.44654118043262725,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.425260347129506,
|
|
"calib/step_q_c_n": 749.0,
|
|
"calib/step_q_gap": 0.12844216531132419,
|
|
"calib/step_q_w": 0.2968181818181818,
|
|
"calib/step_q_w_n": 726.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2184.0,
|
|
"completions/max_terminated_length": 2184.0,
|
|
"completions/mean_length": 547.49609375,
|
|
"completions/mean_terminated_length": 551.8070678710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 115.0,
|
|
"epoch": 0.21226666666666666,
|
|
"grad_norm": 0.29570773243904114,
|
|
"kl": 1.4525260925292969,
|
|
"learning_rate": 2.777777777777778e-08,
|
|
"loss": -0.0941,
|
|
"mask/has_final_conf_rate": 0.97265625,
|
|
"mask/share_final_conf": 0.0359857976436615,
|
|
"mask/share_reasoning": 0.8317693471908569,
|
|
"mask/share_step_conf": 0.12443234026432037,
|
|
"num_tokens": 47734741.0,
|
|
"reward": 0.940308690071106,
|
|
"reward_std": 0.24737019836902618,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/asymmetric_l2_reward": 0.877549409866333,
|
|
"rewards/final_brier_reward_step": 0.7022866606712341,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"step": 199
|
|
},
|
|
{
|
|
"adv/mean_abs_final_conf": 0.5579714775085449,
|
|
"adv/mean_abs_reasoning": 0.4412845969200134,
|
|
"adv/mean_abs_step_conf": 0.7684429883956909,
|
|
"adv/ratio_final_to_reasoning": 1.2644254555970418,
|
|
"adv/ratio_step_to_reasoning": 1.7413773192155577,
|
|
"adv/std_final_conf": 0.7886030077934265,
|
|
"adv/std_reasoning": 0.7204948663711548,
|
|
"adv/std_step_conf": 0.9336580634117126,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.7859723058398554,
|
|
"calib/avg_num_step_conf": 5.30859375,
|
|
"calib/ece": 0.2114399999999999,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.64,
|
|
"calib/gap": 0.52675630476955,
|
|
"calib/mean_conf": 0.6500800000000001,
|
|
"calib/mu_c": 0.8586754966887419,
|
|
"calib/mu_w": 0.3319191919191919,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.12875999999999993,
|
|
"calib/std_conf": 0.46648128965693786,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.4678467635402906,
|
|
"calib/step_q_c_n": 757.0,
|
|
"calib/step_q_gap": 0.20974045124793178,
|
|
"calib/step_q_w": 0.2581063122923588,
|
|
"calib/step_q_w_n": 602.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1912.0,
|
|
"completions/max_terminated_length": 1912.0,
|
|
"completions/mean_length": 496.10546875,
|
|
"completions/mean_terminated_length": 500.0118103027344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.21333333333333335,
|
|
"grad_norm": 0.029257260262966156,
|
|
"kl": 0.0724029541015625,
|
|
"learning_rate": 0.0,
|
|
"loss": -0.0295,
|
|
"mask/has_final_conf_rate": 0.9765625,
|
|
"mask/share_final_conf": 0.03522798418998718,
|
|
"mask/share_reasoning": 0.8422371745109558,
|
|
"mask/share_step_conf": 0.1147223487496376,
|
|
"num_tokens": 47969792.0,
|
|
"reward": 0.9783412218093872,
|
|
"reward_std": 0.192615807056427,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/asymmetric_l2_reward": 0.8736051321029663,
|
|
"rewards/final_brier_reward_step": 0.7705773711204529,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.21333333333333335,
|
|
"step": 200,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.003022296619601548,
|
|
"train_runtime": 14229.7566,
|
|
"train_samples_per_second": 3.598,
|
|
"train_steps_per_second": 0.014
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 47969792,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 25,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|