8527 lines
329 KiB
JSON
8527 lines
329 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.21333333333333335,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calib/answer_extract_rate": 0.015625,
|
|
"calib/avg_num_step_conf": 0.00390625,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.01953125,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 3022.0,
|
|
"completions/max_terminated_length": 3022.0,
|
|
"completions/mean_length": 584.16015625,
|
|
"completions/mean_terminated_length": 667.6116333007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0010666666666666667,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 232809.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 1
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0234375,
|
|
"calib/avg_num_step_conf": 0.00390625,
|
|
"calib/ece": 0.9,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.9,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.9,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.02734375,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/pce": 0.9,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.11328125,
|
|
"completions/max_length": 3054.0,
|
|
"completions/max_terminated_length": 3054.0,
|
|
"completions/mean_length": 643.5234375,
|
|
"completions/mean_terminated_length": 725.7356567382812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0021333333333333334,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 476519.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 2
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.015625,
|
|
"calib/avg_num_step_conf": 0.01953125,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0234375,
|
|
"calib/nonempty_step_conf_rate": 0.0078125,
|
|
"calib/step_conf_rate": 0.0078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09375,
|
|
"completions/max_length": 3072.0,
|
|
"completions/max_terminated_length": 3072.0,
|
|
"completions/mean_length": 736.22265625,
|
|
"completions/mean_terminated_length": 812.3836059570312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0032,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 745928.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 3
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0078125,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0078125,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.10546875,
|
|
"completions/max_length": 3022.0,
|
|
"completions/max_terminated_length": 3022.0,
|
|
"completions/mean_length": 764.01953125,
|
|
"completions/mean_terminated_length": 854.1004638671875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.004266666666666667,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1023365.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 4
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0234375,
|
|
"calib/avg_num_step_conf": 0.046875,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.03125,
|
|
"calib/nonempty_step_conf_rate": 0.01171875,
|
|
"calib/step_conf_rate": 0.01171875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.15625,
|
|
"completions/max_length": 2980.0,
|
|
"completions/max_terminated_length": 2980.0,
|
|
"completions/mean_length": 677.6640625,
|
|
"completions/mean_terminated_length": 803.1574096679688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.005333333333333333,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1279215.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.03515625,
|
|
"calib/auroc": 0.0,
|
|
"calib/avg_num_step_conf": 0.015625,
|
|
"calib/ece": 0.595,
|
|
"calib/final_conf_rate": 0.0078125,
|
|
"calib/format_rate": 0.0078125,
|
|
"calib/frac_conf_gt_0.9": 0.5,
|
|
"calib/gap": -0.18999999999999995,
|
|
"calib/mean_conf": 0.895,
|
|
"calib/mu_c": 0.8,
|
|
"calib/mu_w": 0.99,
|
|
"calib/nonempty_final_conf_rate": 0.0078125,
|
|
"calib/nonempty_reasoning_rate": 0.03515625,
|
|
"calib/nonempty_step_conf_rate": 0.0078125,
|
|
"calib/pce": 0.495,
|
|
"calib/std_conf": 0.09499999999999997,
|
|
"calib/step_conf_rate": 0.0078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.12109375,
|
|
"completions/max_length": 2959.0,
|
|
"completions/max_terminated_length": 2959.0,
|
|
"completions/mean_length": 629.1484375,
|
|
"completions/mean_terminated_length": 715.8311157226562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0064,
|
|
"grad_norm": 0.0022278563119471073,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": -0.002,
|
|
"num_tokens": 1521909.0,
|
|
"reward": 0.008195600472390652,
|
|
"reward_std": 0.023180657997727394,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0038277343846857548,
|
|
"rewards/format_reward_step": 0.0078125,
|
|
"rewards/stepwise_brier_reward": 0.006376933306455612,
|
|
"step": 6
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.03125,
|
|
"calib/avg_num_step_conf": 0.06640625,
|
|
"calib/ece": 0.075,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.00390625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.075,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.075,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.04296875,
|
|
"calib/nonempty_step_conf_rate": 0.015625,
|
|
"calib/pce": 0.075,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.015625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.10546875,
|
|
"completions/max_length": 3048.0,
|
|
"completions/max_terminated_length": 3048.0,
|
|
"completions/mean_length": 728.0703125,
|
|
"completions/mean_terminated_length": 813.9126586914062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.007466666666666667,
|
|
"grad_norm": 0.0008369016577489674,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": -0.0008,
|
|
"num_tokens": 1791399.0,
|
|
"reward": 0.003267470980063081,
|
|
"reward_std": 0.009241803549230099,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.003884277306497097,
|
|
"rewards/format_reward_step": 0.00390625,
|
|
"rewards/stepwise_brier_reward": 0.0037388289347290993,
|
|
"step": 7
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.03125,
|
|
"calib/avg_num_step_conf": 0.00390625,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.03515625,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1015625,
|
|
"completions/max_length": 2809.0,
|
|
"completions/max_terminated_length": 2809.0,
|
|
"completions/mean_length": 630.51171875,
|
|
"completions/mean_terminated_length": 701.7869262695312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.008533333333333334,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 2035002.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 8
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0078125,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0078125,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09765625,
|
|
"completions/max_length": 2871.0,
|
|
"completions/max_terminated_length": 2871.0,
|
|
"completions/mean_length": 705.03125,
|
|
"completions/mean_terminated_length": 781.3333129882812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 10.0,
|
|
"epoch": 0.0096,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 2298706.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 9
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.03125,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/ece": 0.9299999999999999,
|
|
"calib/final_conf_rate": 0.01171875,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.6666666666666666,
|
|
"calib/mean_conf": 0.93,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.93,
|
|
"calib/nonempty_final_conf_rate": 0.01171875,
|
|
"calib/nonempty_reasoning_rate": 0.03125,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/pce": 0.9299999999999999,
|
|
"calib/std_conf": 0.058878405775518984,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1171875,
|
|
"completions/max_length": 2916.0,
|
|
"completions/max_terminated_length": 2916.0,
|
|
"completions/mean_length": 643.71875,
|
|
"completions/mean_terminated_length": 729.1681518554688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.010666666666666666,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 2545978.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0390625,
|
|
"calib/auroc": 1.0,
|
|
"calib/avg_num_step_conf": 0.03125,
|
|
"calib/ece": 0.5333333333333333,
|
|
"calib/final_conf_rate": 0.01171875,
|
|
"calib/format_rate": 0.00390625,
|
|
"calib/frac_conf_gt_0.9": 0.3333333333333333,
|
|
"calib/gap": 0.17500000000000004,
|
|
"calib/mean_conf": 0.8333333333333334,
|
|
"calib/mu_c": 0.95,
|
|
"calib/mu_w": 0.7749999999999999,
|
|
"calib/nonempty_final_conf_rate": 0.01171875,
|
|
"calib/nonempty_reasoning_rate": 0.04296875,
|
|
"calib/nonempty_step_conf_rate": 0.01171875,
|
|
"calib/pce": 0.5166666666666666,
|
|
"calib/std_conf": 0.10274023338281628,
|
|
"calib/step_conf_rate": 0.01171875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.07421875,
|
|
"completions/max_length": 3034.0,
|
|
"completions/max_terminated_length": 3034.0,
|
|
"completions/mean_length": 648.09375,
|
|
"completions/mean_terminated_length": 700.0505981445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011733333333333333,
|
|
"grad_norm": 0.0008397590136155486,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": 0.0073,
|
|
"num_tokens": 2792050.0,
|
|
"reward": 0.0070624202489852905,
|
|
"reward_std": 0.019975541159510612,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0038964843843132257,
|
|
"rewards/format_reward_step": 0.00390625,
|
|
"rewards/stepwise_brier_reward": 0.0032692127861082554,
|
|
"step": 11
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0234375,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0234375,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0859375,
|
|
"completions/max_length": 2813.0,
|
|
"completions/max_terminated_length": 2813.0,
|
|
"completions/mean_length": 721.28125,
|
|
"completions/mean_terminated_length": 789.0940551757812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0128,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 3056554.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 12
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0390625,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/ece": 0.375,
|
|
"calib/final_conf_rate": 0.0078125,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.375,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.375,
|
|
"calib/nonempty_final_conf_rate": 0.0078125,
|
|
"calib/nonempty_reasoning_rate": 0.0390625,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/pce": 0.375,
|
|
"calib/std_conf": 0.375,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1015625,
|
|
"completions/max_length": 2915.0,
|
|
"completions/max_terminated_length": 2915.0,
|
|
"completions/mean_length": 746.140625,
|
|
"completions/mean_terminated_length": 830.4869384765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.013866666666666666,
|
|
"grad_norm": 0.0011903855483978987,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 3327838.0,
|
|
"reward": 0.00390625,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 13
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.02734375,
|
|
"calib/avg_num_step_conf": 0.00390625,
|
|
"calib/ece": 0.95,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/mean_conf": 0.95,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.95,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.03125,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/pce": 0.95,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0859375,
|
|
"completions/max_length": 3027.0,
|
|
"completions/max_terminated_length": 3027.0,
|
|
"completions/mean_length": 729.484375,
|
|
"completions/mean_terminated_length": 798.0684204101562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.014933333333333333,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 3595666.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 14
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.01953125,
|
|
"calib/avg_num_step_conf": 0.01953125,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.02734375,
|
|
"calib/nonempty_step_conf_rate": 0.0078125,
|
|
"calib/step_conf_rate": 0.0078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09375,
|
|
"completions/max_length": 3028.0,
|
|
"completions/max_terminated_length": 3028.0,
|
|
"completions/mean_length": 673.76171875,
|
|
"completions/mean_terminated_length": 743.461181640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 3851709.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.01953125,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/ece": 0.0,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.0,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.01953125,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1171875,
|
|
"completions/max_length": 3068.0,
|
|
"completions/max_terminated_length": 3068.0,
|
|
"completions/mean_length": 705.6015625,
|
|
"completions/mean_terminated_length": 799.2655029296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.017066666666666667,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 4116871.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 16
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.046875,
|
|
"calib/avg_num_step_conf": 0.0390625,
|
|
"calib/ece": 0.8,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.00390625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.8,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.8,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.06640625,
|
|
"calib/nonempty_step_conf_rate": 0.0234375,
|
|
"calib/pce": 0.8,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.0234375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09375,
|
|
"completions/max_length": 2961.0,
|
|
"completions/max_terminated_length": 2961.0,
|
|
"completions/mean_length": 621.75390625,
|
|
"completions/mean_terminated_length": 686.0733032226562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.018133333333333335,
|
|
"grad_norm": 0.0009224429959431291,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": 0.0027,
|
|
"num_tokens": 4355248.0,
|
|
"reward": 0.0020583579316735268,
|
|
"reward_std": 0.005821915343403816,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0014062500558793545,
|
|
"rewards/format_reward_step": 0.00390625,
|
|
"rewards/stepwise_brier_reward": 0.0038584317080676556,
|
|
"step": 17
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0234375,
|
|
"calib/avg_num_step_conf": 0.0078125,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.03125,
|
|
"calib/nonempty_step_conf_rate": 0.0078125,
|
|
"calib/step_conf_rate": 0.0078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.11328125,
|
|
"completions/max_length": 2984.0,
|
|
"completions/max_terminated_length": 2984.0,
|
|
"completions/mean_length": 705.09375,
|
|
"completions/mean_terminated_length": 795.1717529296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.0192,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 4622152.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 18
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.05078125,
|
|
"calib/avg_num_step_conf": 0.015625,
|
|
"calib/ece": 0.6,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.6,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.6,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.05859375,
|
|
"calib/nonempty_step_conf_rate": 0.01171875,
|
|
"calib/pce": 0.6,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.01171875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.08984375,
|
|
"completions/max_length": 2738.0,
|
|
"completions/max_terminated_length": 2738.0,
|
|
"completions/mean_length": 593.578125,
|
|
"completions/mean_terminated_length": 652.1716918945312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.020266666666666665,
|
|
"grad_norm": 0.0009278705110773444,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": -0.0026,
|
|
"num_tokens": 4854548.0,
|
|
"reward": 0.00390625,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 19
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.04296875,
|
|
"calib/avg_num_step_conf": 0.11328125,
|
|
"calib/ece": 0.9,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.9,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.9,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.0625,
|
|
"calib/nonempty_step_conf_rate": 0.01953125,
|
|
"calib/pce": 0.9,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.01953125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0703125,
|
|
"completions/max_length": 3040.0,
|
|
"completions/max_terminated_length": 3040.0,
|
|
"completions/mean_length": 712.390625,
|
|
"completions/mean_terminated_length": 766.2689208984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.021333333333333333,
|
|
"grad_norm": 0.0014255295973271132,
|
|
"learning_rate": 5e-06,
|
|
"loss": -0.0035,
|
|
"num_tokens": 5117472.0,
|
|
"reward": 0.00390625,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.04296875,
|
|
"calib/auroc": 0.0,
|
|
"calib/avg_num_step_conf": 0.0546875,
|
|
"calib/ece": 0.42500000000000004,
|
|
"calib/final_conf_rate": 0.0078125,
|
|
"calib/format_rate": 0.0078125,
|
|
"calib/frac_conf_gt_0.9": 0.5,
|
|
"calib/gap": -0.04999999999999993,
|
|
"calib/mean_conf": 0.925,
|
|
"calib/mu_c": 0.9,
|
|
"calib/mu_w": 0.95,
|
|
"calib/nonempty_final_conf_rate": 0.0078125,
|
|
"calib/nonempty_reasoning_rate": 0.0546875,
|
|
"calib/nonempty_step_conf_rate": 0.02734375,
|
|
"calib/pce": 0.42500000000000004,
|
|
"calib/std_conf": 0.024999999999999967,
|
|
"calib/step_conf_rate": 0.02734375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09375,
|
|
"completions/max_length": 2982.0,
|
|
"completions/max_terminated_length": 2982.0,
|
|
"completions/mean_length": 627.2890625,
|
|
"completions/mean_terminated_length": 692.1810302734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 10.0,
|
|
"epoch": 0.0224,
|
|
"grad_norm": 0.0013866168446838856,
|
|
"learning_rate": 4.9722222222222224e-06,
|
|
"loss": 0.0078,
|
|
"num_tokens": 5356698.0,
|
|
"reward": 0.008453850634396076,
|
|
"reward_std": 0.02391109988093376,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0042480467818677425,
|
|
"rewards/format_reward_step": 0.0078125,
|
|
"rewards/stepwise_brier_reward": 0.006569306366145611,
|
|
"step": 21
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.03515625,
|
|
"calib/avg_num_step_conf": 0.0078125,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.04296875,
|
|
"calib/nonempty_step_conf_rate": 0.0078125,
|
|
"calib/step_conf_rate": 0.0078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09765625,
|
|
"completions/max_length": 2966.0,
|
|
"completions/max_terminated_length": 2966.0,
|
|
"completions/mean_length": 725.8203125,
|
|
"completions/mean_terminated_length": 804.372314453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.023466666666666667,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.944444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 5620004.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 22
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0546875,
|
|
"calib/avg_num_step_conf": 0.015625,
|
|
"calib/ece": 0.9450000000000001,
|
|
"calib/final_conf_rate": 0.0078125,
|
|
"calib/format_rate": 0.00390625,
|
|
"calib/frac_conf_gt_0.9": 0.5,
|
|
"calib/mean_conf": 0.9450000000000001,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.9450000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.0078125,
|
|
"calib/nonempty_reasoning_rate": 0.0546875,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/pce": 0.9450000000000001,
|
|
"calib/std_conf": 0.05499999999999999,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0859375,
|
|
"completions/max_length": 3000.0,
|
|
"completions/max_terminated_length": 3000.0,
|
|
"completions/mean_length": 691.91796875,
|
|
"completions/mean_terminated_length": 756.9700927734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.024533333333333334,
|
|
"grad_norm": 0.001854900037869811,
|
|
"learning_rate": 4.9166666666666665e-06,
|
|
"loss": -0.0057,
|
|
"num_tokens": 5876751.0,
|
|
"reward": 0.0011566466419026256,
|
|
"reward_std": 0.003271490801125765,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.00390625,
|
|
"rewards/stepwise_brier_reward": 0.0030640866607427597,
|
|
"step": 23
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.05078125,
|
|
"calib/avg_num_step_conf": 0.05859375,
|
|
"calib/ece": 0.8,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/mean_conf": 0.8,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.8,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.0546875,
|
|
"calib/nonempty_step_conf_rate": 0.0078125,
|
|
"calib/pce": 0.8,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.0078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09765625,
|
|
"completions/max_length": 3054.0,
|
|
"completions/max_terminated_length": 3054.0,
|
|
"completions/mean_length": 648.19140625,
|
|
"completions/mean_terminated_length": 718.3419799804688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.0256,
|
|
"grad_norm": 0.0009243428939953446,
|
|
"learning_rate": 4.888888888888889e-06,
|
|
"loss": -0.0031,
|
|
"num_tokens": 6122880.0,
|
|
"reward": 0.00390625,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 24
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.05078125,
|
|
"calib/auroc": 1.0,
|
|
"calib/avg_num_step_conf": 0.0234375,
|
|
"calib/ece": 0.44999999999999996,
|
|
"calib/final_conf_rate": 0.0078125,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.5,
|
|
"calib/gap": 0.09999999999999998,
|
|
"calib/mean_conf": 0.95,
|
|
"calib/mu_c": 1.0,
|
|
"calib/mu_w": 0.9,
|
|
"calib/nonempty_final_conf_rate": 0.0078125,
|
|
"calib/nonempty_reasoning_rate": 0.0625,
|
|
"calib/nonempty_step_conf_rate": 0.015625,
|
|
"calib/pce": 0.44999999999999996,
|
|
"calib/std_conf": 0.04999999999999999,
|
|
"calib/step_conf_rate": 0.015625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.08203125,
|
|
"completions/max_length": 3028.0,
|
|
"completions/max_terminated_length": 3028.0,
|
|
"completions/mean_length": 649.58203125,
|
|
"completions/mean_terminated_length": 707.6297607421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.02666666666666667,
|
|
"grad_norm": 0.0013725002063438296,
|
|
"learning_rate": 4.861111111111111e-06,
|
|
"loss": -0.0057,
|
|
"num_tokens": 6368077.0,
|
|
"reward": 0.00390625,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.05859375,
|
|
"calib/avg_num_step_conf": 0.02734375,
|
|
"calib/ece": 1.0,
|
|
"calib/final_conf_rate": 0.00390625,
|
|
"calib/format_rate": 0.00390625,
|
|
"calib/frac_conf_gt_0.9": 1.0,
|
|
"calib/mean_conf": 1.0,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 1.0,
|
|
"calib/nonempty_final_conf_rate": 0.00390625,
|
|
"calib/nonempty_reasoning_rate": 0.0703125,
|
|
"calib/nonempty_step_conf_rate": 0.015625,
|
|
"calib/pce": 1.0,
|
|
"calib/std_conf": 0.0,
|
|
"calib/step_conf_rate": 0.015625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.046875,
|
|
"completions/max_length": 2976.0,
|
|
"completions/max_terminated_length": 2976.0,
|
|
"completions/mean_length": 663.62890625,
|
|
"completions/mean_terminated_length": 696.266357421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 6.0,
|
|
"epoch": 0.027733333333333332,
|
|
"grad_norm": 0.0011524234432727098,
|
|
"learning_rate": 4.833333333333333e-06,
|
|
"loss": -0.0056,
|
|
"num_tokens": 6618886.0,
|
|
"reward": 0.0004935335600748658,
|
|
"reward_std": 0.0013959237840026617,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.00390625,
|
|
"rewards/stepwise_brier_reward": 0.0004116342170163989,
|
|
"step": 26
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0546875,
|
|
"calib/avg_num_step_conf": 0.01953125,
|
|
"calib/ece": 0.7075,
|
|
"calib/final_conf_rate": 0.015625,
|
|
"calib/format_rate": 0.0,
|
|
"calib/frac_conf_gt_0.9": 0.25,
|
|
"calib/mean_conf": 0.7075,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.7075,
|
|
"calib/nonempty_final_conf_rate": 0.015625,
|
|
"calib/nonempty_reasoning_rate": 0.0625,
|
|
"calib/nonempty_step_conf_rate": 0.01171875,
|
|
"calib/pce": 0.7075,
|
|
"calib/std_conf": 0.17795715776557006,
|
|
"calib/step_conf_rate": 0.01171875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05078125,
|
|
"completions/max_length": 2986.0,
|
|
"completions/max_terminated_length": 2986.0,
|
|
"completions/mean_length": 640.40234375,
|
|
"completions/mean_terminated_length": 674.6625366210938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0288,
|
|
"grad_norm": 0.0006247347919270396,
|
|
"learning_rate": 4.805555555555556e-06,
|
|
"loss": 0.0037,
|
|
"num_tokens": 6863725.0,
|
|
"reward": 0.00390625,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 27
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.06640625,
|
|
"calib/auroc": 0.5952380952380952,
|
|
"calib/avg_num_step_conf": 0.046875,
|
|
"calib/ece": 0.512,
|
|
"calib/final_conf_rate": 0.0390625,
|
|
"calib/format_rate": 0.0078125,
|
|
"calib/frac_conf_gt_0.9": 0.3,
|
|
"calib/gap": 0.16380952380952385,
|
|
"calib/mean_conf": 0.752,
|
|
"calib/mu_c": 0.8666666666666667,
|
|
"calib/mu_w": 0.7028571428571428,
|
|
"calib/nonempty_final_conf_rate": 0.0390625,
|
|
"calib/nonempty_reasoning_rate": 0.07421875,
|
|
"calib/nonempty_step_conf_rate": 0.015625,
|
|
"calib/pce": 0.482,
|
|
"calib/std_conf": 0.29188353841900716,
|
|
"calib/step_conf_rate": 0.015625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.078125,
|
|
"completions/max_length": 3033.0,
|
|
"completions/max_terminated_length": 3033.0,
|
|
"completions/mean_length": 668.0703125,
|
|
"completions/mean_terminated_length": 724.6864624023438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.029866666666666666,
|
|
"grad_norm": 0.0018035010434687138,
|
|
"learning_rate": 4.777777777777778e-06,
|
|
"loss": 0.0091,
|
|
"num_tokens": 7117375.0,
|
|
"reward": 0.015450541861355305,
|
|
"reward_std": 0.03422542288899422,
|
|
"rewards/accuracy_reward_step": 0.01171875,
|
|
"rewards/final_brier_reward_step": 0.003554687602445483,
|
|
"rewards/format_reward_step": 0.0078125,
|
|
"rewards/stepwise_brier_reward": 0.004692792426794767,
|
|
"step": 28
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0625,
|
|
"calib/auroc": 0.875,
|
|
"calib/avg_num_step_conf": 0.07421875,
|
|
"calib/ece": 0.67,
|
|
"calib/final_conf_rate": 0.01953125,
|
|
"calib/format_rate": 0.0078125,
|
|
"calib/frac_conf_gt_0.9": 0.6,
|
|
"calib/gap": 0.16249999999999998,
|
|
"calib/mean_conf": 0.8700000000000001,
|
|
"calib/mu_c": 1.0,
|
|
"calib/mu_w": 0.8375,
|
|
"calib/nonempty_final_conf_rate": 0.01953125,
|
|
"calib/nonempty_reasoning_rate": 0.0703125,
|
|
"calib/nonempty_step_conf_rate": 0.0234375,
|
|
"calib/pce": 0.67,
|
|
"calib/std_conf": 0.18867962264113206,
|
|
"calib/step_conf_rate": 0.0234375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05078125,
|
|
"completions/max_length": 3045.0,
|
|
"completions/max_terminated_length": 3045.0,
|
|
"completions/mean_length": 674.35546875,
|
|
"completions/mean_terminated_length": 710.4320678710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.030933333333333334,
|
|
"grad_norm": 0.001415241975337267,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": 0.0066,
|
|
"num_tokens": 7372818.0,
|
|
"reward": 0.007501841522753239,
|
|
"reward_std": 0.018346427008509636,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0029296875,
|
|
"rewards/format_reward_step": 0.0078125,
|
|
"rewards/stepwise_brier_reward": 0.00539799127727747,
|
|
"step": 29
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.078125,
|
|
"calib/auroc": 0.0,
|
|
"calib/avg_num_step_conf": 0.15234375,
|
|
"calib/ece": 0.8557575757575757,
|
|
"calib/final_conf_rate": 0.04296875,
|
|
"calib/format_rate": 0.015625,
|
|
"calib/frac_conf_gt_0.9": 0.2727272727272727,
|
|
"calib/gap": -0.8159259259259259,
|
|
"calib/mean_conf": 0.6775757575757576,
|
|
"calib/mu_c": 0.01,
|
|
"calib/mu_w": 0.825925925925926,
|
|
"calib/nonempty_final_conf_rate": 0.04296875,
|
|
"calib/nonempty_reasoning_rate": 0.0859375,
|
|
"calib/nonempty_step_conf_rate": 0.02734375,
|
|
"calib/pce": 0.6757575757575758,
|
|
"calib/std_conf": 0.3651043600886454,
|
|
"calib/step_conf_rate": 0.02734375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.08984375,
|
|
"completions/max_length": 3036.0,
|
|
"completions/max_terminated_length": 3036.0,
|
|
"completions/mean_length": 718.16015625,
|
|
"completions/mean_terminated_length": 789.051513671875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.002373687457293272,
|
|
"learning_rate": 4.722222222222222e-06,
|
|
"loss": 0.0464,
|
|
"num_tokens": 7639331.0,
|
|
"reward": 0.02229999378323555,
|
|
"reward_std": 0.05606692656874657,
|
|
"rewards/accuracy_reward_step": 0.015625,
|
|
"rewards/final_brier_reward_step": 0.0053374567069113255,
|
|
"rewards/format_reward_step": 0.015625,
|
|
"rewards/stepwise_brier_reward": 0.009775063954293728,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0703125,
|
|
"calib/auroc": 0.18181818181818182,
|
|
"calib/avg_num_step_conf": 0.13671875,
|
|
"calib/ece": 0.8166666666666667,
|
|
"calib/final_conf_rate": 0.046875,
|
|
"calib/format_rate": 0.01171875,
|
|
"calib/frac_conf_gt_0.9": 0.5833333333333334,
|
|
"calib/gap": -0.10909090909090902,
|
|
"calib/mean_conf": 0.9,
|
|
"calib/mu_c": 0.8,
|
|
"calib/mu_w": 0.9090909090909091,
|
|
"calib/nonempty_final_conf_rate": 0.046875,
|
|
"calib/nonempty_reasoning_rate": 0.09375,
|
|
"calib/nonempty_step_conf_rate": 0.04296875,
|
|
"calib/pce": 0.8166666666666667,
|
|
"calib/std_conf": 0.0889756521002609,
|
|
"calib/step_conf_rate": 0.04296875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 3025.0,
|
|
"completions/max_terminated_length": 3025.0,
|
|
"completions/mean_length": 673.96875,
|
|
"completions/mean_terminated_length": 718.9000244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.03306666666666667,
|
|
"grad_norm": 0.001537429285235703,
|
|
"learning_rate": 4.694444444444445e-06,
|
|
"loss": 0.02,
|
|
"num_tokens": 7893459.0,
|
|
"reward": 0.007447539828717709,
|
|
"reward_std": 0.021064823493361473,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.0020898436196148396,
|
|
"rewards/format_reward_step": 0.01171875,
|
|
"rewards/stepwise_brier_reward": 0.005297971423715353,
|
|
"step": 31
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.10546875,
|
|
"calib/auroc": 0.7222222222222222,
|
|
"calib/avg_num_step_conf": 0.11328125,
|
|
"calib/ece": 0.5984848484848484,
|
|
"calib/final_conf_rate": 0.04296875,
|
|
"calib/format_rate": 0.02734375,
|
|
"calib/frac_conf_gt_0.9": 0.45454545454545453,
|
|
"calib/gap": 0.20740740740740748,
|
|
"calib/mean_conf": 0.7803030303030302,
|
|
"calib/mu_c": 0.95,
|
|
"calib/mu_w": 0.7425925925925925,
|
|
"calib/nonempty_final_conf_rate": 0.04296875,
|
|
"calib/nonempty_reasoning_rate": 0.1171875,
|
|
"calib/nonempty_step_conf_rate": 0.05078125,
|
|
"calib/pce": 0.5984848484848484,
|
|
"calib/std_conf": 0.2091128666555893,
|
|
"calib/step_conf_rate": 0.05078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09765625,
|
|
"completions/max_length": 2920.0,
|
|
"completions/max_terminated_length": 2920.0,
|
|
"completions/mean_length": 667.45703125,
|
|
"completions/mean_terminated_length": 739.692626953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.034133333333333335,
|
|
"grad_norm": 0.002994696842506528,
|
|
"learning_rate": 4.666666666666667e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 8146712.0,
|
|
"reward": 0.03033481538295746,
|
|
"reward_std": 0.07062290608882904,
|
|
"rewards/accuracy_reward_step": 0.015625,
|
|
"rewards/final_brier_reward_step": 0.016577690839767456,
|
|
"rewards/format_reward_step": 0.02734375,
|
|
"rewards/stepwise_brier_reward": 0.014746379107236862,
|
|
"step": 32
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.09375,
|
|
"calib/auroc": 0.7321428571428571,
|
|
"calib/avg_num_step_conf": 0.13671875,
|
|
"calib/ece": 0.559375,
|
|
"calib/final_conf_rate": 0.0625,
|
|
"calib/format_rate": 0.03125,
|
|
"calib/frac_conf_gt_0.9": 0.25,
|
|
"calib/gap": 0.24642857142857144,
|
|
"calib/mean_conf": 0.684375,
|
|
"calib/mu_c": 0.9,
|
|
"calib/mu_w": 0.6535714285714286,
|
|
"calib/nonempty_final_conf_rate": 0.0625,
|
|
"calib/nonempty_reasoning_rate": 0.109375,
|
|
"calib/nonempty_step_conf_rate": 0.0546875,
|
|
"calib/pce": 0.559375,
|
|
"calib/std_conf": 0.29406565487149294,
|
|
"calib/step_conf_rate": 0.0546875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 2553.0,
|
|
"completions/max_terminated_length": 2553.0,
|
|
"completions/mean_length": 602.01953125,
|
|
"completions/mean_terminated_length": 642.1541748046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0352,
|
|
"grad_norm": 0.0028193776961416006,
|
|
"learning_rate": 4.638888888888889e-06,
|
|
"loss": 0.0234,
|
|
"num_tokens": 8383381.0,
|
|
"reward": 0.024664167314767838,
|
|
"reward_std": 0.06388188898563385,
|
|
"rewards/accuracy_reward_step": 0.0078125,
|
|
"rewards/final_brier_reward_step": 0.017470702528953552,
|
|
"rewards/format_reward_step": 0.03125,
|
|
"rewards/stepwise_brier_reward": 0.019965259358286858,
|
|
"step": 33
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.19921875,
|
|
"calib/auroc": 0.42800000000000005,
|
|
"calib/avg_num_step_conf": 0.37109375,
|
|
"calib/ece": 0.6201333333333334,
|
|
"calib/final_conf_rate": 0.1171875,
|
|
"calib/format_rate": 0.046875,
|
|
"calib/frac_conf_gt_0.9": 0.26666666666666666,
|
|
"calib/gap": 0.04143999999999992,
|
|
"calib/mean_conf": 0.7534666666666666,
|
|
"calib/mu_c": 0.788,
|
|
"calib/mu_w": 0.7465600000000001,
|
|
"calib/nonempty_final_conf_rate": 0.1171875,
|
|
"calib/nonempty_reasoning_rate": 0.21484375,
|
|
"calib/nonempty_step_conf_rate": 0.09765625,
|
|
"calib/pce": 0.6034666666666667,
|
|
"calib/std_conf": 0.28588164606276417,
|
|
"calib/step_conf_rate": 0.09765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.04296875,
|
|
"completions/max_length": 3019.0,
|
|
"completions/max_terminated_length": 3019.0,
|
|
"completions/mean_length": 567.203125,
|
|
"completions/mean_terminated_length": 592.6693725585938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.03626666666666667,
|
|
"grad_norm": 0.003705602139234543,
|
|
"learning_rate": 4.611111111111112e-06,
|
|
"loss": 0.0529,
|
|
"num_tokens": 8609377.0,
|
|
"reward": 0.055107709020376205,
|
|
"reward_std": 0.11008341610431671,
|
|
"rewards/accuracy_reward_step": 0.02734375,
|
|
"rewards/final_brier_reward_step": 0.027791012078523636,
|
|
"rewards/format_reward_step": 0.046875,
|
|
"rewards/stepwise_brier_reward": 0.03672381490468979,
|
|
"step": 34
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.1640625,
|
|
"calib/auroc": 0.43333333333333335,
|
|
"calib/avg_num_step_conf": 0.3046875,
|
|
"calib/ece": 0.6269482758620689,
|
|
"calib/final_conf_rate": 0.11328125,
|
|
"calib/format_rate": 0.05859375,
|
|
"calib/frac_conf_gt_0.9": 0.41379310344827586,
|
|
"calib/gap": 0.0699375000000001,
|
|
"calib/mean_conf": 0.7821206896551722,
|
|
"calib/mu_c": 0.8400000000000001,
|
|
"calib/mu_w": 0.7700625,
|
|
"calib/nonempty_final_conf_rate": 0.11328125,
|
|
"calib/nonempty_reasoning_rate": 0.18359375,
|
|
"calib/nonempty_step_conf_rate": 0.09765625,
|
|
"calib/pce": 0.6183275862068965,
|
|
"calib/std_conf": 0.2577841136049904,
|
|
"calib/step_conf_rate": 0.09765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.046875,
|
|
"completions/max_length": 2975.0,
|
|
"completions/max_terminated_length": 2975.0,
|
|
"completions/mean_length": 693.68359375,
|
|
"completions/mean_terminated_length": 727.7991333007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.037333333333333336,
|
|
"grad_norm": 0.004338345490396023,
|
|
"learning_rate": 4.583333333333333e-06,
|
|
"loss": 0.0167,
|
|
"num_tokens": 8871896.0,
|
|
"reward": 0.05112885683774948,
|
|
"reward_std": 0.11319087445735931,
|
|
"rewards/accuracy_reward_step": 0.0234375,
|
|
"rewards/final_brier_reward_step": 0.027192480862140656,
|
|
"rewards/format_reward_step": 0.05859375,
|
|
"rewards/stepwise_brier_reward": 0.03294296935200691,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.3046875,
|
|
"calib/auroc": 0.6007751937984497,
|
|
"calib/avg_num_step_conf": 0.4296875,
|
|
"calib/ece": 0.5844320787878787,
|
|
"calib/final_conf_rate": 0.21484375,
|
|
"calib/format_rate": 0.09765625,
|
|
"calib/frac_conf_gt_0.9": 0.34545454545454546,
|
|
"calib/gap": 0.0745636201550387,
|
|
"calib/mean_conf": 0.7517048060606061,
|
|
"calib/mu_c": 0.81,
|
|
"calib/mu_w": 0.7354363798449614,
|
|
"calib/nonempty_final_conf_rate": 0.21484375,
|
|
"calib/nonempty_reasoning_rate": 0.34375,
|
|
"calib/nonempty_step_conf_rate": 0.1796875,
|
|
"calib/pce": 0.5589775333333333,
|
|
"calib/std_conf": 0.287776394163258,
|
|
"calib/step_conf_rate": 0.1796875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 2921.0,
|
|
"completions/max_terminated_length": 2921.0,
|
|
"completions/mean_length": 601.33203125,
|
|
"completions/mean_terminated_length": 625.7764282226562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0384,
|
|
"grad_norm": 0.004933076910674572,
|
|
"learning_rate": 4.555555555555556e-06,
|
|
"loss": 0.0768,
|
|
"num_tokens": 9104229.0,
|
|
"reward": 0.12337145209312439,
|
|
"reward_std": 0.2592686712741852,
|
|
"rewards/accuracy_reward_step": 0.0703125,
|
|
"rewards/final_brier_reward_step": 0.052323076874017715,
|
|
"rewards/format_reward_step": 0.09765625,
|
|
"rewards/stepwise_brier_reward": 0.06852716207504272,
|
|
"step": 36
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.30078125,
|
|
"calib/auroc": 0.5104761904761905,
|
|
"calib/avg_num_step_conf": 0.54296875,
|
|
"calib/ece": 0.5085999999999999,
|
|
"calib/final_conf_rate": 0.1953125,
|
|
"calib/format_rate": 0.109375,
|
|
"calib/frac_conf_gt_0.9": 0.48,
|
|
"calib/gap": 0.10104761904761894,
|
|
"calib/mean_conf": 0.8005999999999999,
|
|
"calib/mu_c": 0.8713333333333332,
|
|
"calib/mu_w": 0.7702857142857142,
|
|
"calib/nonempty_final_conf_rate": 0.1953125,
|
|
"calib/nonempty_reasoning_rate": 0.359375,
|
|
"calib/nonempty_step_conf_rate": 0.21484375,
|
|
"calib/pce": 0.5045999999999999,
|
|
"calib/std_conf": 0.24287165334801836,
|
|
"calib/step_conf_rate": 0.21484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 2856.0,
|
|
"completions/max_terminated_length": 2856.0,
|
|
"completions/mean_length": 581.85546875,
|
|
"completions/mean_terminated_length": 605.5081176757812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.039466666666666664,
|
|
"grad_norm": 0.005056615453213453,
|
|
"learning_rate": 4.527777777777778e-06,
|
|
"loss": 0.0466,
|
|
"num_tokens": 9335960.0,
|
|
"reward": 0.12689536809921265,
|
|
"reward_std": 0.22808219492435455,
|
|
"rewards/accuracy_reward_step": 0.06640625,
|
|
"rewards/final_brier_reward_step": 0.0597599595785141,
|
|
"rewards/format_reward_step": 0.109375,
|
|
"rewards/stepwise_brier_reward": 0.07868652790784836,
|
|
"step": 37
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.39453125,
|
|
"calib/auroc": 0.5423728813559322,
|
|
"calib/avg_num_step_conf": 0.68359375,
|
|
"calib/ece": 0.5359868421052632,
|
|
"calib/final_conf_rate": 0.296875,
|
|
"calib/format_rate": 0.16796875,
|
|
"calib/frac_conf_gt_0.9": 0.23684210526315788,
|
|
"calib/gap": 0.04203888334995021,
|
|
"calib/mean_conf": 0.7367763157894737,
|
|
"calib/mu_c": 0.7694117647058824,
|
|
"calib/mu_w": 0.7273728813559321,
|
|
"calib/nonempty_final_conf_rate": 0.296875,
|
|
"calib/nonempty_reasoning_rate": 0.453125,
|
|
"calib/nonempty_step_conf_rate": 0.265625,
|
|
"calib/pce": 0.5245394736842105,
|
|
"calib/std_conf": 0.26233353612510213,
|
|
"calib/step_conf_rate": 0.265625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05078125,
|
|
"completions/max_length": 2913.0,
|
|
"completions/max_terminated_length": 2913.0,
|
|
"completions/mean_length": 582.05078125,
|
|
"completions/mean_terminated_length": 613.1892700195312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 5.0,
|
|
"epoch": 0.04053333333333333,
|
|
"grad_norm": 0.005831545684486628,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 0.0665,
|
|
"num_tokens": 9567533.0,
|
|
"reward": 0.15777722001075745,
|
|
"reward_std": 0.30419182777404785,
|
|
"rewards/accuracy_reward_step": 0.0703125,
|
|
"rewards/final_brier_reward_step": 0.08795741945505142,
|
|
"rewards/format_reward_step": 0.16796875,
|
|
"rewards/stepwise_brier_reward": 0.10675650835037231,
|
|
"step": 38
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.48828125,
|
|
"calib/auroc": 0.5432098765432098,
|
|
"calib/avg_num_step_conf": 0.94140625,
|
|
"calib/ece": 0.590637037037037,
|
|
"calib/final_conf_rate": 0.3515625,
|
|
"calib/format_rate": 0.21875,
|
|
"calib/frac_conf_gt_0.9": 0.28888888888888886,
|
|
"calib/gap": 0.006287037037036924,
|
|
"calib/mean_conf": 0.7699703703703704,
|
|
"calib/mu_c": 0.7749999999999999,
|
|
"calib/mu_w": 0.768712962962963,
|
|
"calib/nonempty_final_conf_rate": 0.3515625,
|
|
"calib/nonempty_reasoning_rate": 0.546875,
|
|
"calib/nonempty_step_conf_rate": 0.33984375,
|
|
"calib/pce": 0.5803037037037037,
|
|
"calib/std_conf": 0.2472235537764113,
|
|
"calib/step_conf_rate": 0.33984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 3038.0,
|
|
"completions/max_terminated_length": 3038.0,
|
|
"completions/mean_length": 551.421875,
|
|
"completions/mean_terminated_length": 555.7637939453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.0416,
|
|
"grad_norm": 0.006903126370161772,
|
|
"learning_rate": 4.472222222222223e-06,
|
|
"loss": 0.051,
|
|
"num_tokens": 9790465.0,
|
|
"reward": 0.2162027508020401,
|
|
"reward_std": 0.3284416198730469,
|
|
"rewards/accuracy_reward_step": 0.10546875,
|
|
"rewards/final_brier_reward_step": 0.10541588068008423,
|
|
"rewards/format_reward_step": 0.21875,
|
|
"rewards/stepwise_brier_reward": 0.14460425078868866,
|
|
"step": 39
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.52734375,
|
|
"calib/auroc": 0.4849439775910364,
|
|
"calib/avg_num_step_conf": 0.91015625,
|
|
"calib/ece": 0.5725723293386139,
|
|
"calib/final_conf_rate": 0.39453125,
|
|
"calib/format_rate": 0.25,
|
|
"calib/frac_conf_gt_0.9": 0.2871287128712871,
|
|
"calib/gap": 0.0035876590982260037,
|
|
"calib/mean_conf": 0.7242475768633662,
|
|
"calib/mu_c": 0.7272313725490196,
|
|
"calib/mu_w": 0.7236437134507936,
|
|
"calib/nonempty_final_conf_rate": 0.39453125,
|
|
"calib/nonempty_reasoning_rate": 0.5703125,
|
|
"calib/nonempty_step_conf_rate": 0.3515625,
|
|
"calib/pce": 0.5642515372594059,
|
|
"calib/std_conf": 0.2690641315047504,
|
|
"calib/step_conf_rate": 0.3515625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 2740.0,
|
|
"completions/max_terminated_length": 2740.0,
|
|
"completions/mean_length": 581.7578125,
|
|
"completions/mean_terminated_length": 593.3466186523438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.042666666666666665,
|
|
"grad_norm": 0.005969388876110315,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.0536,
|
|
"num_tokens": 10021835.0,
|
|
"reward": 0.1922258585691452,
|
|
"reward_std": 0.31835484504699707,
|
|
"rewards/accuracy_reward_step": 0.0703125,
|
|
"rewards/final_brier_reward_step": 0.11862140148878098,
|
|
"rewards/format_reward_step": 0.25,
|
|
"rewards/stepwise_brier_reward": 0.15041063725948334,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.6328125,
|
|
"calib/auroc": 0.6348139255702281,
|
|
"calib/avg_num_step_conf": 1.3515625,
|
|
"calib/ece": 0.4594850746268657,
|
|
"calib/final_conf_rate": 0.5234375,
|
|
"calib/format_rate": 0.33984375,
|
|
"calib/frac_conf_gt_0.9": 0.3582089552238806,
|
|
"calib/gap": 0.10644921968787524,
|
|
"calib/mean_conf": 0.810231343283582,
|
|
"calib/mu_c": 0.8777551020408163,
|
|
"calib/mu_w": 0.7713058823529411,
|
|
"calib/nonempty_final_conf_rate": 0.5234375,
|
|
"calib/nonempty_reasoning_rate": 0.6953125,
|
|
"calib/nonempty_step_conf_rate": 0.46484375,
|
|
"calib/pce": 0.4520223880597016,
|
|
"calib/std_conf": 0.22017079239437895,
|
|
"calib/step_conf_rate": 0.46484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2761.0,
|
|
"completions/max_terminated_length": 2761.0,
|
|
"completions/mean_length": 467.984375,
|
|
"completions/mean_terminated_length": 469.81964111328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.04373333333333333,
|
|
"grad_norm": 0.006464121863245964,
|
|
"learning_rate": 4.416666666666667e-06,
|
|
"loss": 0.0262,
|
|
"num_tokens": 10224567.0,
|
|
"reward": 0.42656055092811584,
|
|
"reward_std": 0.527800977230072,
|
|
"rewards/accuracy_reward_step": 0.234375,
|
|
"rewards/final_brier_reward_step": 0.20245546102523804,
|
|
"rewards/format_reward_step": 0.33984375,
|
|
"rewards/stepwise_brier_reward": 0.22789371013641357,
|
|
"step": 41
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.75,
|
|
"calib/auroc": 0.5936829952168894,
|
|
"calib/avg_num_step_conf": 1.4609375,
|
|
"calib/ece": 0.5021393939393939,
|
|
"calib/final_conf_rate": 0.6875,
|
|
"calib/format_rate": 0.4140625,
|
|
"calib/frac_conf_gt_0.9": 0.23295454545454544,
|
|
"calib/gap": 0.08260315575347699,
|
|
"calib/mean_conf": 0.7600939393939395,
|
|
"calib/mu_c": 0.8206382978723401,
|
|
"calib/mu_w": 0.7380351421188631,
|
|
"calib/nonempty_final_conf_rate": 0.6875,
|
|
"calib/nonempty_reasoning_rate": 0.83203125,
|
|
"calib/nonempty_step_conf_rate": 0.53515625,
|
|
"calib/pce": 0.4975939393939393,
|
|
"calib/std_conf": 0.22961455958490537,
|
|
"calib/step_conf_rate": 0.53515625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3022.0,
|
|
"completions/max_terminated_length": 3022.0,
|
|
"completions/mean_length": 426.38671875,
|
|
"completions/mean_terminated_length": 426.38671875,
|
|
"completions/min_length": 3.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0448,
|
|
"grad_norm": 0.007076622918248177,
|
|
"learning_rate": 4.388888888888889e-06,
|
|
"loss": -0.0324,
|
|
"num_tokens": 10413770.0,
|
|
"reward": 0.4534400701522827,
|
|
"reward_std": 0.5130658149719238,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.24723786115646362,
|
|
"rewards/format_reward_step": 0.4140625,
|
|
"rewards/stepwise_brier_reward": 0.309909462928772,
|
|
"step": 42
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.75,
|
|
"calib/auroc": 0.5842462180490351,
|
|
"calib/avg_num_step_conf": 1.87109375,
|
|
"calib/ece": 0.5750532544378698,
|
|
"calib/final_conf_rate": 0.66015625,
|
|
"calib/format_rate": 0.4296875,
|
|
"calib/frac_conf_gt_0.9": 0.22485207100591717,
|
|
"calib/gap": 0.05968622848200311,
|
|
"calib/mean_conf": 0.7206272189349112,
|
|
"calib/mu_c": 0.7707777777777777,
|
|
"calib/mu_w": 0.7110915492957746,
|
|
"calib/nonempty_final_conf_rate": 0.66015625,
|
|
"calib/nonempty_reasoning_rate": 0.8125,
|
|
"calib/nonempty_step_conf_rate": 0.5625,
|
|
"calib/pce": 0.5679585798816568,
|
|
"calib/std_conf": 0.24888318015815863,
|
|
"calib/step_conf_rate": 0.5625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 3049.0,
|
|
"completions/max_terminated_length": 3049.0,
|
|
"completions/mean_length": 439.2578125,
|
|
"completions/mean_terminated_length": 440.98040771484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 13.0,
|
|
"epoch": 0.04586666666666667,
|
|
"grad_norm": 0.007090613711625338,
|
|
"learning_rate": 4.361111111111112e-06,
|
|
"loss": 0.0476,
|
|
"num_tokens": 10607124.0,
|
|
"reward": 0.3386271595954895,
|
|
"reward_std": 0.39735865592956543,
|
|
"rewards/accuracy_reward_step": 0.12109375,
|
|
"rewards/final_brier_reward_step": 0.2085159420967102,
|
|
"rewards/format_reward_step": 0.4296875,
|
|
"rewards/stepwise_brier_reward": 0.28122684359550476,
|
|
"step": 43
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.76953125,
|
|
"calib/auroc": 0.5903628117913833,
|
|
"calib/avg_num_step_conf": 1.6953125,
|
|
"calib/ece": 0.5652112994350283,
|
|
"calib/final_conf_rate": 0.69140625,
|
|
"calib/format_rate": 0.48828125,
|
|
"calib/frac_conf_gt_0.9": 0.1864406779661017,
|
|
"calib/gap": 0.06537142857142841,
|
|
"calib/mean_conf": 0.7133751412429379,
|
|
"calib/mu_c": 0.7676666666666666,
|
|
"calib/mu_w": 0.7022952380952382,
|
|
"calib/nonempty_final_conf_rate": 0.69140625,
|
|
"calib/nonempty_reasoning_rate": 0.82421875,
|
|
"calib/nonempty_step_conf_rate": 0.58203125,
|
|
"calib/pce": 0.5545474576271188,
|
|
"calib/std_conf": 0.26426165075547436,
|
|
"calib/step_conf_rate": 0.58203125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2293.0,
|
|
"completions/max_terminated_length": 2293.0,
|
|
"completions/mean_length": 424.12890625,
|
|
"completions/mean_terminated_length": 427.468505859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.046933333333333334,
|
|
"grad_norm": 0.007667779456824064,
|
|
"learning_rate": 4.333333333333334e-06,
|
|
"loss": 0.0207,
|
|
"num_tokens": 10797701.0,
|
|
"reward": 0.3810100555419922,
|
|
"reward_std": 0.44098833203315735,
|
|
"rewards/accuracy_reward_step": 0.12890625,
|
|
"rewards/final_brier_reward_step": 0.2581155300140381,
|
|
"rewards/format_reward_step": 0.48828125,
|
|
"rewards/stepwise_brier_reward": 0.2968715727329254,
|
|
"step": 44
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.875,
|
|
"calib/auroc": 0.5488539523949456,
|
|
"calib/avg_num_step_conf": 2.5390625,
|
|
"calib/ece": 0.5292746010832967,
|
|
"calib/final_conf_rate": 0.80859375,
|
|
"calib/format_rate": 0.625,
|
|
"calib/frac_conf_gt_0.9": 0.20772946859903382,
|
|
"calib/gap": 0.06627188665972095,
|
|
"calib/mean_conf": 0.7273422339335384,
|
|
"calib/mu_c": 0.7804878048780488,
|
|
"calib/mu_w": 0.7142159182183279,
|
|
"calib/nonempty_final_conf_rate": 0.80859375,
|
|
"calib/nonempty_reasoning_rate": 0.9140625,
|
|
"calib/nonempty_step_conf_rate": 0.70703125,
|
|
"calib/pce": 0.5292746010832967,
|
|
"calib/std_conf": 0.2359906160423243,
|
|
"calib/step_conf_rate": 0.70703125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1338.0,
|
|
"completions/max_terminated_length": 1338.0,
|
|
"completions/mean_length": 369.1953125,
|
|
"completions/mean_terminated_length": 370.6431579589844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.007053131703287363,
|
|
"learning_rate": 4.305555555555556e-06,
|
|
"loss": 0.0124,
|
|
"num_tokens": 10972943.0,
|
|
"reward": 0.5057880878448486,
|
|
"reward_std": 0.5060071349143982,
|
|
"rewards/accuracy_reward_step": 0.171875,
|
|
"rewards/final_brier_reward_step": 0.3368590772151947,
|
|
"rewards/format_reward_step": 0.625,
|
|
"rewards/stepwise_brier_reward": 0.41193413734436035,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.88671875,
|
|
"calib/auroc": 0.4821515594541911,
|
|
"calib/avg_num_step_conf": 2.92578125,
|
|
"calib/ece": 0.5230832354859751,
|
|
"calib/final_conf_rate": 0.85546875,
|
|
"calib/format_rate": 0.6875,
|
|
"calib/frac_conf_gt_0.9": 0.2054794520547945,
|
|
"calib/gap": -0.027187009189640787,
|
|
"calib/mean_conf": 0.7066448793215917,
|
|
"calib/mu_c": 0.6854166666666667,
|
|
"calib/mu_w": 0.7126036758563075,
|
|
"calib/nonempty_final_conf_rate": 0.85546875,
|
|
"calib/nonempty_reasoning_rate": 0.95703125,
|
|
"calib/nonempty_step_conf_rate": 0.81640625,
|
|
"calib/pce": 0.505275016307893,
|
|
"calib/std_conf": 0.2716084230018331,
|
|
"calib/step_conf_rate": 0.81640625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2120.0,
|
|
"completions/max_terminated_length": 2120.0,
|
|
"completions/mean_length": 327.48046875,
|
|
"completions/mean_terminated_length": 328.7647399902344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.04906666666666667,
|
|
"grad_norm": 0.00798877328634262,
|
|
"learning_rate": 4.277777777777778e-06,
|
|
"loss": 0.0421,
|
|
"num_tokens": 11137226.0,
|
|
"reward": 0.5734426975250244,
|
|
"reward_std": 0.4912108778953552,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.3600817918777466,
|
|
"rewards/format_reward_step": 0.6875,
|
|
"rewards/stepwise_brier_reward": 0.4704821705818176,
|
|
"step": 46
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.93359375,
|
|
"calib/auroc": 0.5108861726508785,
|
|
"calib/avg_num_step_conf": 2.671875,
|
|
"calib/ece": 0.52622634643377,
|
|
"calib/final_conf_rate": 0.89453125,
|
|
"calib/format_rate": 0.7734375,
|
|
"calib/frac_conf_gt_0.9": 0.17903930131004367,
|
|
"calib/gap": -0.001643111790170737,
|
|
"calib/mean_conf": 0.6920560407569141,
|
|
"calib/mu_c": 0.6907142857142856,
|
|
"calib/mu_w": 0.6923573975044564,
|
|
"calib/nonempty_final_conf_rate": 0.89453125,
|
|
"calib/nonempty_reasoning_rate": 0.96484375,
|
|
"calib/nonempty_step_conf_rate": 0.8515625,
|
|
"calib/pce": 0.5174381368267831,
|
|
"calib/std_conf": 0.2640733101582573,
|
|
"calib/step_conf_rate": 0.8515625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1529.0,
|
|
"completions/max_terminated_length": 1529.0,
|
|
"completions/mean_length": 281.4453125,
|
|
"completions/mean_terminated_length": 282.5490417480469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.050133333333333335,
|
|
"grad_norm": 0.008706745691597462,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": -0.0012,
|
|
"num_tokens": 11290932.0,
|
|
"reward": 0.593336820602417,
|
|
"reward_std": 0.4346812069416046,
|
|
"rewards/accuracy_reward_step": 0.171875,
|
|
"rewards/final_brier_reward_step": 0.41285210847854614,
|
|
"rewards/format_reward_step": 0.7734375,
|
|
"rewards/stepwise_brier_reward": 0.5507680773735046,
|
|
"step": 47
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.94921875,
|
|
"calib/auroc": 0.4797202797202797,
|
|
"calib/avg_num_step_conf": 2.421875,
|
|
"calib/ece": 0.4998348675034866,
|
|
"calib/final_conf_rate": 0.93359375,
|
|
"calib/format_rate": 0.80078125,
|
|
"calib/frac_conf_gt_0.9": 0.100418410041841,
|
|
"calib/gap": -0.021629386169386144,
|
|
"calib/mean_conf": 0.6506019525801954,
|
|
"calib/mu_c": 0.6329545454545454,
|
|
"calib/mu_w": 0.6545839316239316,
|
|
"calib/nonempty_final_conf_rate": 0.93359375,
|
|
"calib/nonempty_reasoning_rate": 0.97265625,
|
|
"calib/nonempty_step_conf_rate": 0.8671875,
|
|
"calib/pce": 0.48316820083682,
|
|
"calib/std_conf": 0.26267684598556074,
|
|
"calib/step_conf_rate": 0.8671875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2729.0,
|
|
"completions/max_terminated_length": 2729.0,
|
|
"completions/mean_length": 273.65234375,
|
|
"completions/mean_terminated_length": 273.65234375,
|
|
"completions/min_length": 24.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.0512,
|
|
"grad_norm": 0.00950379017740488,
|
|
"learning_rate": 4.222222222222223e-06,
|
|
"loss": 0.0538,
|
|
"num_tokens": 11440355.0,
|
|
"reward": 0.6238257884979248,
|
|
"reward_std": 0.43043237924575806,
|
|
"rewards/accuracy_reward_step": 0.17578125,
|
|
"rewards/final_brier_reward_step": 0.4479580819606781,
|
|
"rewards/format_reward_step": 0.80078125,
|
|
"rewards/stepwise_brier_reward": 0.5759493708610535,
|
|
"step": 48
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.472853125675968,
|
|
"calib/avg_num_step_conf": 2.40625,
|
|
"calib/ece": 0.44603238866396755,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.88671875,
|
|
"calib/frac_conf_gt_0.9": 0.05668016194331984,
|
|
"calib/gap": -0.022916937053860997,
|
|
"calib/mean_conf": 0.6088663967611335,
|
|
"calib/mu_c": 0.5902173913043479,
|
|
"calib/mu_w": 0.6131343283582089,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.921875,
|
|
"calib/pce": 0.43433198380566795,
|
|
"calib/std_conf": 0.2598775995840334,
|
|
"calib/step_conf_rate": 0.921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 831.0,
|
|
"completions/max_terminated_length": 831.0,
|
|
"completions/mean_length": 223.96875,
|
|
"completions/mean_terminated_length": 224.84707641601562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.05226666666666667,
|
|
"grad_norm": 0.010475818999111652,
|
|
"learning_rate": 4.194444444444445e-06,
|
|
"loss": -0.0446,
|
|
"num_tokens": 11577907.0,
|
|
"reward": 0.6920686960220337,
|
|
"reward_std": 0.41203224658966064,
|
|
"rewards/accuracy_reward_step": 0.1796875,
|
|
"rewards/final_brier_reward_step": 0.5262347459793091,
|
|
"rewards/format_reward_step": 0.88671875,
|
|
"rewards/stepwise_brier_reward": 0.6423674821853638,
|
|
"step": 49
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5308615967979776,
|
|
"calib/avg_num_step_conf": 2.47265625,
|
|
"calib/ece": 0.44089667761956924,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.890625,
|
|
"calib/frac_conf_gt_0.9": 0.07228915662650602,
|
|
"calib/gap": 0.019779458796943428,
|
|
"calib/mean_conf": 0.59671997079226,
|
|
"calib/mu_c": 0.6127659574468084,
|
|
"calib/mu_w": 0.592986498649865,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9453125,
|
|
"calib/pce": 0.424430814165754,
|
|
"calib/std_conf": 0.2535598394856577,
|
|
"calib/step_conf_rate": 0.9453125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 934.0,
|
|
"completions/max_terminated_length": 934.0,
|
|
"completions/mean_length": 254.2890625,
|
|
"completions/mean_terminated_length": 255.28628540039062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.05333333333333334,
|
|
"grad_norm": 0.00964977964758873,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0643,
|
|
"num_tokens": 11724045.0,
|
|
"reward": 0.7111487984657288,
|
|
"reward_std": 0.3974384367465973,
|
|
"rewards/accuracy_reward_step": 0.18359375,
|
|
"rewards/final_brier_reward_step": 0.5488967895507812,
|
|
"rewards/format_reward_step": 0.890625,
|
|
"rewards/stepwise_brier_reward": 0.6561766266822815,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5422007941009643,
|
|
"calib/avg_num_step_conf": 2.56640625,
|
|
"calib/ece": 0.3676209677419355,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.9140625,
|
|
"calib/frac_conf_gt_0.9": 0.05241935483870968,
|
|
"calib/gap": 0.03715825297787856,
|
|
"calib/mean_conf": 0.5325403225806451,
|
|
"calib/mu_c": 0.5632558139534883,
|
|
"calib/mu_w": 0.5260975609756098,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9609375,
|
|
"calib/pce": 0.3633870967741936,
|
|
"calib/std_conf": 0.26420528733914556,
|
|
"calib/step_conf_rate": 0.9609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1129.0,
|
|
"completions/max_terminated_length": 1129.0,
|
|
"completions/mean_length": 263.0625,
|
|
"completions/mean_terminated_length": 264.0941467285156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.0544,
|
|
"grad_norm": 0.010027616284787655,
|
|
"learning_rate": 4.138888888888889e-06,
|
|
"loss": -0.027,
|
|
"num_tokens": 11876365.0,
|
|
"reward": 0.7352821826934814,
|
|
"reward_std": 0.36998486518859863,
|
|
"rewards/accuracy_reward_step": 0.171875,
|
|
"rewards/final_brier_reward_step": 0.610413670539856,
|
|
"rewards/format_reward_step": 0.9140625,
|
|
"rewards/stepwise_brier_reward": 0.6671764254570007,
|
|
"step": 51
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5019496632399858,
|
|
"calib/avg_num_step_conf": 2.34765625,
|
|
"calib/ece": 0.30011612021857925,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.921875,
|
|
"calib/frac_conf_gt_0.9": 0.01639344262295082,
|
|
"calib/gap": 0.001085017133404087,
|
|
"calib/mean_conf": 0.46773907103825135,
|
|
"calib/mu_c": 0.46854838709677415,
|
|
"calib/mu_w": 0.46746336996337007,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.9765625,
|
|
"calib/nonempty_step_conf_rate": 0.94140625,
|
|
"calib/pce": 0.2568784153005465,
|
|
"calib/std_conf": 0.2673715065884634,
|
|
"calib/step_conf_rate": 0.94140625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1073.0,
|
|
"completions/max_terminated_length": 1073.0,
|
|
"completions/mean_length": 285.71484375,
|
|
"completions/mean_terminated_length": 286.8352966308594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.055466666666666664,
|
|
"grad_norm": 0.009401632472872734,
|
|
"learning_rate": 4.111111111111111e-06,
|
|
"loss": -0.0295,
|
|
"num_tokens": 12033140.0,
|
|
"reward": 0.8306794762611389,
|
|
"reward_std": 0.42442840337753296,
|
|
"rewards/accuracy_reward_step": 0.2421875,
|
|
"rewards/final_brier_reward_step": 0.6383389234542847,
|
|
"rewards/format_reward_step": 0.921875,
|
|
"rewards/stepwise_brier_reward": 0.7085399031639099,
|
|
"step": 52
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5132941848136368,
|
|
"calib/avg_num_step_conf": 2.484375,
|
|
"calib/ece": 0.23466786786786786,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9375,
|
|
"calib/frac_conf_gt_0.9": 0.024,
|
|
"calib/gap": 0.0066150221667715114,
|
|
"calib/mean_conf": 0.4082921321321321,
|
|
"calib/mu_c": 0.41313432835820896,
|
|
"calib/mu_w": 0.40651930619143745,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.18747999999999998,
|
|
"calib/std_conf": 0.2618325792640843,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 920.0,
|
|
"completions/max_terminated_length": 920.0,
|
|
"completions/mean_length": 304.16015625,
|
|
"completions/mean_terminated_length": 305.35296630859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.05653333333333333,
|
|
"grad_norm": 0.008001954294741154,
|
|
"learning_rate": 4.083333333333334e-06,
|
|
"loss": -0.0696,
|
|
"num_tokens": 12192509.0,
|
|
"reward": 0.8803744316101074,
|
|
"reward_std": 0.404864639043808,
|
|
"rewards/accuracy_reward_step": 0.26171875,
|
|
"rewards/final_brier_reward_step": 0.6734973192214966,
|
|
"rewards/format_reward_step": 0.9375,
|
|
"rewards/stepwise_brier_reward": 0.7526279091835022,
|
|
"step": 53
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4499376385809313,
|
|
"calib/avg_num_step_conf": 2.88671875,
|
|
"calib/ece": 0.23645489417989418,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 0.011904761904761904,
|
|
"calib/gap": -0.03331431079083513,
|
|
"calib/mean_conf": 0.37891521164021164,
|
|
"calib/mu_c": 0.3572344696969697,
|
|
"calib/mu_w": 0.39054878048780484,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.13308187830687832,
|
|
"calib/std_conf": 0.23750741110241325,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1160.0,
|
|
"completions/max_terminated_length": 1160.0,
|
|
"completions/mean_length": 316.30859375,
|
|
"completions/mean_terminated_length": 317.5490417480469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.0576,
|
|
"grad_norm": 0.007840687409043312,
|
|
"learning_rate": 4.055555555555556e-06,
|
|
"loss": -0.014,
|
|
"num_tokens": 12355396.0,
|
|
"reward": 0.9629466533660889,
|
|
"reward_std": 0.404100239276886,
|
|
"rewards/accuracy_reward_step": 0.34375,
|
|
"rewards/final_brier_reward_step": 0.6632779836654663,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"rewards/stepwise_brier_reward": 0.7689805626869202,
|
|
"step": 54
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5006683375104428,
|
|
"calib/avg_num_step_conf": 3.12890625,
|
|
"calib/ece": 0.18966403162055342,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9453125,
|
|
"calib/frac_conf_gt_0.9": 0.003952569169960474,
|
|
"calib/gap": -0.005073934837092731,
|
|
"calib/mean_conf": 0.3447628458498023,
|
|
"calib/mu_c": 0.34095238095238095,
|
|
"calib/mu_w": 0.3460263157894737,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
|
"calib/pce": 0.14270750988142294,
|
|
"calib/std_conf": 0.2376830334506864,
|
|
"calib/step_conf_rate": 0.96484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1283.0,
|
|
"completions/max_terminated_length": 1283.0,
|
|
"completions/mean_length": 327.33203125,
|
|
"completions/mean_terminated_length": 328.6156921386719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.058666666666666666,
|
|
"grad_norm": 0.007699980866163969,
|
|
"learning_rate": 4.027777777777779e-06,
|
|
"loss": -0.0497,
|
|
"num_tokens": 12522697.0,
|
|
"reward": 0.8905273079872131,
|
|
"reward_std": 0.3477417826652527,
|
|
"rewards/accuracy_reward_step": 0.24609375,
|
|
"rewards/final_brier_reward_step": 0.7088057398796082,
|
|
"rewards/format_reward_step": 0.9453125,
|
|
"rewards/stepwise_brier_reward": 0.7819976806640625,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5157967032967032,
|
|
"calib/avg_num_step_conf": 2.82421875,
|
|
"calib/ece": 0.1903705179282869,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.953125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002797435897435807,
|
|
"calib/mean_conf": 0.28532669322709164,
|
|
"calib/mu_c": 0.2875,
|
|
"calib/mu_w": 0.28470256410256417,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.12629482071713147,
|
|
"calib/std_conf": 0.21593719770159248,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1160.0,
|
|
"completions/max_terminated_length": 1160.0,
|
|
"completions/mean_length": 373.734375,
|
|
"completions/mean_terminated_length": 375.20001220703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.05973333333333333,
|
|
"grad_norm": 0.006527104414999485,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": -0.0028,
|
|
"num_tokens": 12700893.0,
|
|
"reward": 0.881033182144165,
|
|
"reward_std": 0.3198215663433075,
|
|
"rewards/accuracy_reward_step": 0.21875,
|
|
"rewards/final_brier_reward_step": 0.7373512387275696,
|
|
"rewards/format_reward_step": 0.953125,
|
|
"rewards/stepwise_brier_reward": 0.7931802272796631,
|
|
"step": 56
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5634366361123235,
|
|
"calib/avg_num_step_conf": 3.69140625,
|
|
"calib/ece": 0.15581027667984193,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.03008802560744947,
|
|
"calib/mean_conf": 0.23588932806324114,
|
|
"calib/mu_c": 0.25658227848101267,
|
|
"calib/mu_w": 0.2264942528735632,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.03972332015810279,
|
|
"calib/std_conf": 0.18538630643438173,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2483.0,
|
|
"completions/max_terminated_length": 2483.0,
|
|
"completions/mean_length": 389.6875,
|
|
"completions/mean_terminated_length": 389.6875,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.0608,
|
|
"grad_norm": 0.006315163802355528,
|
|
"learning_rate": 3.972222222222223e-06,
|
|
"loss": 0.0188,
|
|
"num_tokens": 12883125.0,
|
|
"reward": 0.9760843515396118,
|
|
"reward_std": 0.308395653963089,
|
|
"rewards/accuracy_reward_step": 0.3125,
|
|
"rewards/final_brier_reward_step": 0.7303789854049683,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/stepwise_brier_reward": 0.8076419234275818,
|
|
"step": 57
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.540296052631579,
|
|
"calib/avg_num_step_conf": 3.41015625,
|
|
"calib/ece": 0.11228346456692911,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.003937007874015748,
|
|
"calib/gap": 0.028472039473684208,
|
|
"calib/mean_conf": 0.20385826771653542,
|
|
"calib/mu_c": 0.22515625000000003,
|
|
"calib/mu_w": 0.19668421052631582,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.03208661417322833,
|
|
"calib/std_conf": 0.16255851638884533,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1585.0,
|
|
"completions/max_terminated_length": 1585.0,
|
|
"completions/mean_length": 416.50390625,
|
|
"completions/mean_terminated_length": 418.13726806640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.06186666666666667,
|
|
"grad_norm": 0.006129554007202387,
|
|
"learning_rate": 3.944444444444445e-06,
|
|
"loss": 0.0073,
|
|
"num_tokens": 13071750.0,
|
|
"reward": 0.934508740901947,
|
|
"reward_std": 0.2841190993785858,
|
|
"rewards/accuracy_reward_step": 0.25,
|
|
"rewards/final_brier_reward_step": 0.7646179795265198,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/stepwise_brier_reward": 0.8228615522384644,
|
|
"step": 58
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5461218497675556,
|
|
"calib/avg_num_step_conf": 2.7578125,
|
|
"calib/ece": 0.14782000000000003,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014022918195905676,
|
|
"calib/mean_conf": 0.18078000000000002,
|
|
"calib/mu_c": 0.191044776119403,
|
|
"calib/mu_w": 0.1770218579234973,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.0303,
|
|
"calib/std_conf": 0.1345402973090219,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1427.0,
|
|
"completions/max_terminated_length": 1427.0,
|
|
"completions/mean_length": 393.22265625,
|
|
"completions/mean_terminated_length": 394.7647399902344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 59.0,
|
|
"epoch": 0.06293333333333333,
|
|
"grad_norm": 0.006685024127364159,
|
|
"learning_rate": 3.916666666666667e-06,
|
|
"loss": -0.0082,
|
|
"num_tokens": 13254343.0,
|
|
"reward": 0.9424407482147217,
|
|
"reward_std": 0.25202226638793945,
|
|
"rewards/accuracy_reward_step": 0.26171875,
|
|
"rewards/final_brier_reward_step": 0.7529077529907227,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/stepwise_brier_reward": 0.832697331905365,
|
|
"step": 59
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.48704967349035133,
|
|
"calib/avg_num_step_conf": 2.875,
|
|
"calib/ece": 0.19244094488188979,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.010248734316530977,
|
|
"calib/mean_conf": 0.1674015748031496,
|
|
"calib/mu_c": 0.16025974025974024,
|
|
"calib/mu_w": 0.17050847457627122,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.028346456692913382,
|
|
"calib/std_conf": 0.14151159546504766,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1183.0,
|
|
"completions/max_terminated_length": 1183.0,
|
|
"completions/mean_length": 381.546875,
|
|
"completions/mean_terminated_length": 383.04315185546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 86.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.006986516993492842,
|
|
"learning_rate": 3.88888888888889e-06,
|
|
"loss": -0.0189,
|
|
"num_tokens": 13436555.0,
|
|
"reward": 0.9669762849807739,
|
|
"reward_std": 0.2988584637641907,
|
|
"rewards/accuracy_reward_step": 0.3046875,
|
|
"rewards/final_brier_reward_step": 0.7100223302841187,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/stepwise_brier_reward": 0.8447355628013611,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5678276189185527,
|
|
"calib/avg_num_step_conf": 3.328125,
|
|
"calib/ece": 0.23713147410358568,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.027661607263856913,
|
|
"calib/mean_conf": 0.1513147410358566,
|
|
"calib/mu_c": 0.16861702127659575,
|
|
"calib/mu_w": 0.14095541401273884,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.006972111553784861,
|
|
"calib/std_conf": 0.12346372494108282,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1312.0,
|
|
"completions/max_terminated_length": 1312.0,
|
|
"completions/mean_length": 377.54296875,
|
|
"completions/mean_terminated_length": 379.0235595703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 63.0,
|
|
"epoch": 0.06506666666666666,
|
|
"grad_norm": 0.005949671845883131,
|
|
"learning_rate": 3.861111111111112e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 13612950.0,
|
|
"reward": 1.017664909362793,
|
|
"reward_std": 0.28337812423706055,
|
|
"rewards/accuracy_reward_step": 0.37109375,
|
|
"rewards/final_brier_reward_step": 0.687488317489624,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"rewards/stepwise_brier_reward": 0.8253706097602844,
|
|
"step": 61
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5463499851323224,
|
|
"calib/avg_num_step_conf": 3.02734375,
|
|
"calib/ece": 0.18968379446640318,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.013424769550996113,
|
|
"calib/mean_conf": 0.13205533596837946,
|
|
"calib/mu_c": 0.14144736842105263,
|
|
"calib/mu_w": 0.12802259887005651,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.010671936758893281,
|
|
"calib/std_conf": 0.09509993132453148,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2034.0,
|
|
"completions/max_terminated_length": 2034.0,
|
|
"completions/mean_length": 429.671875,
|
|
"completions/mean_terminated_length": 431.3569030761719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 123.0,
|
|
"epoch": 0.06613333333333334,
|
|
"grad_norm": 0.005477833095937967,
|
|
"learning_rate": 3.833333333333334e-06,
|
|
"loss": 0.0293,
|
|
"num_tokens": 13805706.0,
|
|
"reward": 0.9830002784729004,
|
|
"reward_std": 0.23470573127269745,
|
|
"rewards/accuracy_reward_step": 0.296875,
|
|
"rewards/final_brier_reward_step": 0.7375777363777161,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.878720760345459,
|
|
"step": 62
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5444078947368423,
|
|
"calib/avg_num_step_conf": 2.859375,
|
|
"calib/ece": 0.2141792828685259,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009222953216374272,
|
|
"calib/mean_conf": 0.11809163346613548,
|
|
"calib/mu_c": 0.12437500000000001,
|
|
"calib/mu_w": 0.11515204678362574,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.006772908366533861,
|
|
"calib/std_conf": 0.07993141712709298,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2531.0,
|
|
"completions/max_terminated_length": 2531.0,
|
|
"completions/mean_length": 447.6015625,
|
|
"completions/mean_terminated_length": 447.6015625,
|
|
"completions/min_length": 83.0,
|
|
"completions/min_terminated_length": 83.0,
|
|
"epoch": 0.0672,
|
|
"grad_norm": 0.00584860285744071,
|
|
"learning_rate": 3.8055555555555556e-06,
|
|
"loss": 0.0147,
|
|
"num_tokens": 14004612.0,
|
|
"reward": 0.9794288873672485,
|
|
"reward_std": 0.29020506143569946,
|
|
"rewards/accuracy_reward_step": 0.3125,
|
|
"rewards/final_brier_reward_step": 0.7084218263626099,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/stepwise_brier_reward": 0.8664965629577637,
|
|
"step": 63
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5040711307972903,
|
|
"calib/avg_num_step_conf": 3.3671875,
|
|
"calib/ece": 0.2938339920948616,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0027872589890568372,
|
|
"calib/mean_conf": 0.11565217391304349,
|
|
"calib/mu_c": 0.11732673267326736,
|
|
"calib/mu_w": 0.11453947368421052,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.005138339920948615,
|
|
"calib/std_conf": 0.09020874762154729,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2021.0,
|
|
"completions/max_terminated_length": 2021.0,
|
|
"completions/mean_length": 430.953125,
|
|
"completions/mean_terminated_length": 432.6431579589844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 69.0,
|
|
"epoch": 0.06826666666666667,
|
|
"grad_norm": 0.005867753177881241,
|
|
"learning_rate": 3.777777777777778e-06,
|
|
"loss": 0.0771,
|
|
"num_tokens": 14194392.0,
|
|
"reward": 1.0440330505371094,
|
|
"reward_std": 0.25283029675483704,
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
|
"rewards/final_brier_reward_step": 0.6612000465393066,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8818568587303162,
|
|
"step": 64
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5328330206378987,
|
|
"calib/avg_num_step_conf": 3.39453125,
|
|
"calib/ece": 0.2721960784313725,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.008724872688287325,
|
|
"calib/mean_conf": 0.0987843137254902,
|
|
"calib/mu_c": 0.1043956043956044,
|
|
"calib/mu_w": 0.09567073170731707,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.007058823529411765,
|
|
"calib/std_conf": 0.09795403358268236,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1859.0,
|
|
"completions/max_terminated_length": 1859.0,
|
|
"completions/mean_length": 393.78515625,
|
|
"completions/mean_terminated_length": 395.3294372558594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 109.0,
|
|
"epoch": 0.06933333333333333,
|
|
"grad_norm": 0.005394121166318655,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.0359,
|
|
"num_tokens": 14375905.0,
|
|
"reward": 1.0301740169525146,
|
|
"reward_std": 0.20167192816734314,
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
|
"rewards/final_brier_reward_step": 0.6878316402435303,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9278453588485718,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5066545250770524,
|
|
"calib/avg_num_step_conf": 3.8828125,
|
|
"calib/ece": 0.2540476190476191,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002675819557298967,
|
|
"calib/mean_conf": 0.09753968253968255,
|
|
"calib/mu_c": 0.09930232558139535,
|
|
"calib/mu_w": 0.09662650602409638,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.005158730158730155,
|
|
"calib/std_conf": 0.07080810764196235,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2925.0,
|
|
"completions/max_terminated_length": 2925.0,
|
|
"completions/mean_length": 508.9296875,
|
|
"completions/mean_terminated_length": 510.9255065917969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 126.0,
|
|
"epoch": 0.0704,
|
|
"grad_norm": 0.004965396132320166,
|
|
"learning_rate": 3.7222222222222225e-06,
|
|
"loss": 0.0431,
|
|
"num_tokens": 14588223.0,
|
|
"reward": 1.0066497325897217,
|
|
"reward_std": 0.22029651701450348,
|
|
"rewards/accuracy_reward_step": 0.3359375,
|
|
"rewards/final_brier_reward_step": 0.693121075630188,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.9059818387031555,
|
|
"step": 66
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.551000272182907,
|
|
"calib/avg_num_step_conf": 3.73828125,
|
|
"calib/ece": 0.26639215686274514,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009852340772999471,
|
|
"calib/mean_conf": 0.08184313725490196,
|
|
"calib/mu_c": 0.08829545454545455,
|
|
"calib/mu_w": 0.07844311377245508,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0015686274509803923,
|
|
"calib/std_conf": 0.06126233290770837,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1602.0,
|
|
"completions/max_terminated_length": 1602.0,
|
|
"completions/mean_length": 459.57421875,
|
|
"completions/mean_terminated_length": 461.3764953613281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 86.0,
|
|
"epoch": 0.07146666666666666,
|
|
"grad_norm": 0.005392501130700111,
|
|
"learning_rate": 3.694444444444445e-06,
|
|
"loss": 0.0618,
|
|
"num_tokens": 14786562.0,
|
|
"reward": 1.0170819759368896,
|
|
"reward_std": 0.20529669523239136,
|
|
"rewards/accuracy_reward_step": 0.34375,
|
|
"rewards/final_brier_reward_step": 0.6934664249420166,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.9126452207565308,
|
|
"step": 67
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5467941059338908,
|
|
"calib/avg_num_step_conf": 4.19140625,
|
|
"calib/ece": 0.29388235294117654,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0017702110712863178,
|
|
"calib/mean_conf": 0.08650980392156864,
|
|
"calib/mu_c": 0.08763440860215052,
|
|
"calib/mu_w": 0.0858641975308642,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00784313725490196,
|
|
"calib/std_conf": 0.062220157048168946,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1261.0,
|
|
"completions/max_terminated_length": 1261.0,
|
|
"completions/mean_length": 430.3671875,
|
|
"completions/mean_terminated_length": 432.054931640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 147.0,
|
|
"epoch": 0.07253333333333334,
|
|
"grad_norm": 0.005932637490332127,
|
|
"learning_rate": 3.6666666666666666e-06,
|
|
"loss": 0.0064,
|
|
"num_tokens": 14976504.0,
|
|
"reward": 1.0296599864959717,
|
|
"reward_std": 0.19540664553642273,
|
|
"rewards/accuracy_reward_step": 0.36328125,
|
|
"rewards/final_brier_reward_step": 0.6813062429428101,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.9060271978378296,
|
|
"step": 68
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.488728323699422,
|
|
"calib/avg_num_step_conf": 4.49609375,
|
|
"calib/ece": 0.25498023715415014,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.008100433526011558,
|
|
"calib/mean_conf": 0.07466403162055336,
|
|
"calib/mu_c": 0.069125,
|
|
"calib/mu_w": 0.07722543352601156,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.006719367588932806,
|
|
"calib/std_conf": 0.06855067779127547,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2401.0,
|
|
"completions/max_terminated_length": 2401.0,
|
|
"completions/mean_length": 506.68359375,
|
|
"completions/mean_terminated_length": 506.68359375,
|
|
"completions/min_length": 153.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.0736,
|
|
"grad_norm": 0.005567294545471668,
|
|
"learning_rate": 3.638888888888889e-06,
|
|
"loss": 0.0527,
|
|
"num_tokens": 15186391.0,
|
|
"reward": 0.986290693283081,
|
|
"reward_std": 0.20521283149719238,
|
|
"rewards/accuracy_reward_step": 0.3125,
|
|
"rewards/final_brier_reward_step": 0.7012136578559875,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/stepwise_brier_reward": 0.9005477428436279,
|
|
"step": 69
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5348829201101928,
|
|
"calib/avg_num_step_conf": 4.90625,
|
|
"calib/ece": 0.28343043478260865,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005661515151515167,
|
|
"calib/mean_conf": 0.06755770750988142,
|
|
"calib/mu_c": 0.07125000000000001,
|
|
"calib/mu_w": 0.06558848484848484,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0015810276679841897,
|
|
"calib/std_conf": 0.04427719536654392,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2458.0,
|
|
"completions/max_terminated_length": 2458.0,
|
|
"completions/mean_length": 532.078125,
|
|
"completions/mean_terminated_length": 532.078125,
|
|
"completions/min_length": 128.0,
|
|
"completions/min_terminated_length": 128.0,
|
|
"epoch": 0.07466666666666667,
|
|
"grad_norm": 0.005367174278944731,
|
|
"learning_rate": 3.6111111111111115e-06,
|
|
"loss": 0.0722,
|
|
"num_tokens": 15405275.0,
|
|
"reward": 1.0083911418914795,
|
|
"reward_std": 0.21531283855438232,
|
|
"rewards/accuracy_reward_step": 0.34375,
|
|
"rewards/final_brier_reward_step": 0.6785225868225098,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.9108941555023193,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5870109546165885,
|
|
"calib/avg_num_step_conf": 4.94140625,
|
|
"calib/ece": 0.21571314741035857,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.011325899843505474,
|
|
"calib/mean_conf": 0.07034262948207172,
|
|
"calib/mu_c": 0.07846478873239436,
|
|
"calib/mu_w": 0.06713888888888889,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0015936254980079682,
|
|
"calib/std_conf": 0.044136538145399744,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2511.0,
|
|
"completions/max_terminated_length": 2511.0,
|
|
"completions/mean_length": 521.18359375,
|
|
"completions/mean_terminated_length": 523.2274780273438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 130.0,
|
|
"epoch": 0.07573333333333333,
|
|
"grad_norm": 0.006266950163990259,
|
|
"learning_rate": 3.5833333333333335e-06,
|
|
"loss": 0.0659,
|
|
"num_tokens": 15618786.0,
|
|
"reward": 0.9764514565467834,
|
|
"reward_std": 0.21679821610450745,
|
|
"rewards/accuracy_reward_step": 0.28125,
|
|
"rewards/final_brier_reward_step": 0.739886999130249,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/stepwise_brier_reward": 0.9088441133499146,
|
|
"step": 71
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5564166779477461,
|
|
"calib/avg_num_step_conf": 4.76953125,
|
|
"calib/ece": 0.28569803921568626,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.006989779342087457,
|
|
"calib/mean_conf": 0.06567450980392156,
|
|
"calib/mu_c": 0.0702247191011236,
|
|
"calib/mu_w": 0.06323493975903614,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.001176470588235294,
|
|
"calib/std_conf": 0.03793575499594873,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2863.0,
|
|
"completions/max_terminated_length": 2863.0,
|
|
"completions/mean_length": 488.56640625,
|
|
"completions/mean_terminated_length": 488.56640625,
|
|
"completions/min_length": 151.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.0768,
|
|
"grad_norm": 0.005957463290542364,
|
|
"learning_rate": 3.555555555555556e-06,
|
|
"loss": 0.0653,
|
|
"num_tokens": 15823947.0,
|
|
"reward": 1.0219218730926514,
|
|
"reward_std": 0.22082969546318054,
|
|
"rewards/accuracy_reward_step": 0.34765625,
|
|
"rewards/final_brier_reward_step": 0.6837721467018127,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9342056512832642,
|
|
"step": 72
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.47057364341085267,
|
|
"calib/avg_num_step_conf": 5.25390625,
|
|
"calib/ece": 0.4320236220472441,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.005496186046511628,
|
|
"calib/mean_conf": 0.06403937007874015,
|
|
"calib/mu_c": 0.061248,
|
|
"calib/mu_w": 0.06674418604651162,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.001968503937007874,
|
|
"calib/std_conf": 0.03856488582247894,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2683.0,
|
|
"completions/max_terminated_length": 2683.0,
|
|
"completions/mean_length": 477.5859375,
|
|
"completions/mean_terminated_length": 477.5859375,
|
|
"completions/min_length": 172.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.07786666666666667,
|
|
"grad_norm": 0.0061846706084907055,
|
|
"learning_rate": 3.5277777777777784e-06,
|
|
"loss": 0.0666,
|
|
"num_tokens": 16028921.0,
|
|
"reward": 1.0817893743515015,
|
|
"reward_std": 0.20533283054828644,
|
|
"rewards/accuracy_reward_step": 0.48828125,
|
|
"rewards/final_brier_reward_step": 0.5581741333007812,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8608090877532959,
|
|
"step": 73
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.521512254241853,
|
|
"calib/avg_num_step_conf": 5.86328125,
|
|
"calib/ece": 0.3111904761904762,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0040587126312954455,
|
|
"calib/mean_conf": 0.0634126984126984,
|
|
"calib/mu_c": 0.06595744680851062,
|
|
"calib/mu_w": 0.06189873417721518,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0007936507936507937,
|
|
"calib/std_conf": 0.03853801871359453,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2650.0,
|
|
"completions/max_terminated_length": 2650.0,
|
|
"completions/mean_length": 517.50390625,
|
|
"completions/mean_terminated_length": 519.5333862304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.07893333333333333,
|
|
"grad_norm": 0.005901651456952095,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.0445,
|
|
"num_tokens": 16241010.0,
|
|
"reward": 1.0170509815216064,
|
|
"reward_std": 0.20792332291603088,
|
|
"rewards/accuracy_reward_step": 0.3671875,
|
|
"rewards/final_brier_reward_step": 0.6602047085762024,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8852944374084473,
|
|
"step": 74
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5081420263238445,
|
|
"calib/avg_num_step_conf": 4.078125,
|
|
"calib/ece": 0.46271484375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0029008264462809935,
|
|
"calib/mean_conf": 0.06462890625,
|
|
"calib/mu_c": 0.066,
|
|
"calib/mu_w": 0.06309917355371901,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.03832120344115919,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1698.0,
|
|
"completions/max_terminated_length": 1698.0,
|
|
"completions/mean_length": 453.01171875,
|
|
"completions/mean_terminated_length": 454.78826904296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 94.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.006512695923447609,
|
|
"learning_rate": 3.4722222222222224e-06,
|
|
"loss": 0.0785,
|
|
"num_tokens": 16437413.0,
|
|
"reward": 1.1022157669067383,
|
|
"reward_std": 0.24831125140190125,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/final_brier_reward_step": 0.5249893665313721,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8557596206665039,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.47437848807711824,
|
|
"calib/avg_num_step_conf": 5.10546875,
|
|
"calib/ece": 0.36612992125984245,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.004097158802638251,
|
|
"calib/mean_conf": 0.061429133858267725,
|
|
"calib/mu_c": 0.05907407407407407,
|
|
"calib/mu_w": 0.06317123287671232,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0011811023622047244,
|
|
"calib/std_conf": 0.03622568282448506,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2885.0,
|
|
"completions/max_terminated_length": 2885.0,
|
|
"completions/mean_length": 506.078125,
|
|
"completions/mean_terminated_length": 506.078125,
|
|
"completions/min_length": 156.0,
|
|
"completions/min_terminated_length": 156.0,
|
|
"epoch": 0.08106666666666666,
|
|
"grad_norm": 0.005883322563022375,
|
|
"learning_rate": 3.444444444444445e-06,
|
|
"loss": 0.0326,
|
|
"num_tokens": 16645705.0,
|
|
"reward": 1.056463599205017,
|
|
"reward_std": 0.22573524713516235,
|
|
"rewards/accuracy_reward_step": 0.42578125,
|
|
"rewards/final_brier_reward_step": 0.610862135887146,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.9072553515434265,
|
|
"step": 76
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5324880629758678,
|
|
"calib/avg_num_step_conf": 4.58984375,
|
|
"calib/ece": 0.4254538152610442,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.004864692218350772,
|
|
"calib/mean_conf": 0.06852208835341365,
|
|
"calib/mu_c": 0.07098373983739839,
|
|
"calib/mu_w": 0.06611904761904762,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.03417578750904915,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2795.0,
|
|
"completions/max_terminated_length": 2795.0,
|
|
"completions/mean_length": 488.5703125,
|
|
"completions/mean_terminated_length": 492.4173278808594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 157.0,
|
|
"epoch": 0.08213333333333334,
|
|
"grad_norm": 0.007012397050857544,
|
|
"learning_rate": 3.416666666666667e-06,
|
|
"loss": 0.0819,
|
|
"num_tokens": 16851123.0,
|
|
"reward": 1.083645224571228,
|
|
"reward_std": 0.2435062825679779,
|
|
"rewards/accuracy_reward_step": 0.484375,
|
|
"rewards/final_brier_reward_step": 0.5546954870223999,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/stepwise_brier_reward": 0.8986270427703857,
|
|
"step": 77
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.45486111111111116,
|
|
"calib/avg_num_step_conf": 4.734375,
|
|
"calib/ece": 0.374155859375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.005849900793650789,
|
|
"calib/mean_conf": 0.06334414062500002,
|
|
"calib/mu_c": 0.06005357142857143,
|
|
"calib/mu_w": 0.06590347222222222,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.034715984431421855,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1480.0,
|
|
"completions/max_terminated_length": 1480.0,
|
|
"completions/mean_length": 514.87109375,
|
|
"completions/mean_terminated_length": 516.8901977539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 146.0,
|
|
"epoch": 0.0832,
|
|
"grad_norm": 0.007328997366130352,
|
|
"learning_rate": 3.3888888888888893e-06,
|
|
"loss": 0.0514,
|
|
"num_tokens": 17066634.0,
|
|
"reward": 1.0606286525726318,
|
|
"reward_std": 0.24192503094673157,
|
|
"rewards/accuracy_reward_step": 0.4375,
|
|
"rewards/final_brier_reward_step": 0.594253420829773,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.9102579951286316,
|
|
"step": 78
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.4554146156758803,
|
|
"calib/avg_num_step_conf": 3.83203125,
|
|
"calib/ece": 0.38522924901185773,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.006175880348352883,
|
|
"calib/mean_conf": 0.06773517786561264,
|
|
"calib/mu_c": 0.06434210526315791,
|
|
"calib/mu_w": 0.07051798561151079,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0011857707509881422,
|
|
"calib/std_conf": 0.03531614736199371,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2591.0,
|
|
"completions/max_terminated_length": 2591.0,
|
|
"completions/mean_length": 540.53515625,
|
|
"completions/mean_terminated_length": 542.6549072265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 196.0,
|
|
"epoch": 0.08426666666666667,
|
|
"grad_norm": 0.006232034880667925,
|
|
"learning_rate": 3.3611111111111117e-06,
|
|
"loss": 0.0227,
|
|
"num_tokens": 17287067.0,
|
|
"reward": 1.0605382919311523,
|
|
"reward_std": 0.2099224030971527,
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
|
"rewards/final_brier_reward_step": 0.5937643647193909,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8796247243881226,
|
|
"step": 79
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5189950980392157,
|
|
"calib/avg_num_step_conf": 4.34765625,
|
|
"calib/ece": 0.39099609375000005,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.004933823529411782,
|
|
"calib/mean_conf": 0.07775390625000002,
|
|
"calib/mu_c": 0.08037500000000003,
|
|
"calib/mu_w": 0.07544117647058825,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.03450166980185468,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1215.0,
|
|
"completions/max_terminated_length": 1215.0,
|
|
"completions/mean_length": 467.828125,
|
|
"completions/mean_terminated_length": 469.66278076171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.08533333333333333,
|
|
"grad_norm": 0.007364832330495119,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.037,
|
|
"num_tokens": 17484671.0,
|
|
"reward": 1.0838050842285156,
|
|
"reward_std": 0.20884555578231812,
|
|
"rewards/accuracy_reward_step": 0.46875,
|
|
"rewards/final_brier_reward_step": 0.5993655323982239,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.8614894151687622,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.46869521662910085,
|
|
"calib/avg_num_step_conf": 4.25390625,
|
|
"calib/ece": 0.3979802371541502,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0033891184573002497,
|
|
"calib/mean_conf": 0.0802806324110672,
|
|
"calib/mu_c": 0.0785123966942149,
|
|
"calib/mu_w": 0.08190151515151516,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02879126700453696,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2672.0,
|
|
"completions/max_terminated_length": 2672.0,
|
|
"completions/mean_length": 539.29296875,
|
|
"completions/mean_terminated_length": 539.29296875,
|
|
"completions/min_length": 145.0,
|
|
"completions/min_terminated_length": 145.0,
|
|
"epoch": 0.0864,
|
|
"grad_norm": 0.006128122564405203,
|
|
"learning_rate": 3.3055555555555558e-06,
|
|
"loss": 0.0711,
|
|
"num_tokens": 17704658.0,
|
|
"reward": 1.0754420757293701,
|
|
"reward_std": 0.24022594094276428,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/final_brier_reward_step": 0.5745105743408203,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.871496856212616,
|
|
"step": 81
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5617386489479512,
|
|
"calib/avg_num_step_conf": 4.0859375,
|
|
"calib/ece": 0.4251764705882353,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005105204872646693,
|
|
"calib/mean_conf": 0.08305882352941177,
|
|
"calib/mu_c": 0.0855813953488372,
|
|
"calib/mu_w": 0.0804761904761905,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.001176470588235294,
|
|
"calib/std_conf": 0.029480077529832522,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1368.0,
|
|
"completions/max_terminated_length": 1368.0,
|
|
"completions/mean_length": 452.6015625,
|
|
"completions/mean_terminated_length": 454.3764953613281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 168.0,
|
|
"epoch": 0.08746666666666666,
|
|
"grad_norm": 0.007257652468979359,
|
|
"learning_rate": 3.277777777777778e-06,
|
|
"loss": 0.0319,
|
|
"num_tokens": 17901756.0,
|
|
"reward": 1.0876719951629639,
|
|
"reward_std": 0.2194688618183136,
|
|
"rewards/accuracy_reward_step": 0.50390625,
|
|
"rewards/final_brier_reward_step": 0.5622234344482422,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8168658018112183,
|
|
"step": 82
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5077663870767319,
|
|
"calib/avg_num_step_conf": 4.6015625,
|
|
"calib/ece": 0.35140625000000003,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0005914880397639016,
|
|
"calib/mean_conf": 0.08218750000000002,
|
|
"calib/mu_c": 0.08252252252252251,
|
|
"calib/mu_w": 0.08193103448275861,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.028171836002468854,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1741.0,
|
|
"completions/max_terminated_length": 1741.0,
|
|
"completions/mean_length": 545.0234375,
|
|
"completions/mean_terminated_length": 547.1608276367188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 214.0,
|
|
"epoch": 0.08853333333333334,
|
|
"grad_norm": 0.006978335324674845,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": 0.0352,
|
|
"num_tokens": 18124226.0,
|
|
"reward": 1.075504183769226,
|
|
"reward_std": 0.1979038417339325,
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
|
"rewards/final_brier_reward_step": 0.6304203271865845,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9068013429641724,
|
|
"step": 83
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5181970363940728,
|
|
"calib/avg_num_step_conf": 3.91015625,
|
|
"calib/ece": 0.4087795275590552,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003385826771653541,
|
|
"calib/mean_conf": 0.09122047244094487,
|
|
"calib/mu_c": 0.09291338582677164,
|
|
"calib/mu_w": 0.0895275590551181,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022105834689869885,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1348.0,
|
|
"completions/max_terminated_length": 1348.0,
|
|
"completions/mean_length": 462.6171875,
|
|
"completions/mean_terminated_length": 464.431396484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.0896,
|
|
"grad_norm": 0.009016655385494232,
|
|
"learning_rate": 3.2222222222222227e-06,
|
|
"loss": 0.0442,
|
|
"num_tokens": 18324256.0,
|
|
"reward": 1.1034959554672241,
|
|
"reward_std": 0.2110147476196289,
|
|
"rewards/accuracy_reward_step": 0.5,
|
|
"rewards/final_brier_reward_step": 0.5795402526855469,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8580283522605896,
|
|
"step": 84
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5173715029277814,
|
|
"calib/avg_num_step_conf": 4.75390625,
|
|
"calib/ece": 0.33095617529880483,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.000230318802862714,
|
|
"calib/mean_conf": 0.09533864541832669,
|
|
"calib/mu_c": 0.09547169811320753,
|
|
"calib/mu_w": 0.09524137931034482,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.00199203187250996,
|
|
"calib/std_conf": 0.023092246602381246,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2770.0,
|
|
"completions/max_terminated_length": 2770.0,
|
|
"completions/mean_length": 525.9453125,
|
|
"completions/mean_terminated_length": 532.1818237304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 178.0,
|
|
"epoch": 0.09066666666666667,
|
|
"grad_norm": 0.008049411699175835,
|
|
"learning_rate": 3.1944444444444443e-06,
|
|
"loss": 0.0607,
|
|
"num_tokens": 18542402.0,
|
|
"reward": 1.0478579998016357,
|
|
"reward_std": 0.23492947220802307,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/final_brier_reward_step": 0.6321375370025635,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.8802822828292847,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5167689161554192,
|
|
"calib/avg_num_step_conf": 4.1796875,
|
|
"calib/ece": 0.2594071146245059,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0008766189502385779,
|
|
"calib/mean_conf": 0.09632411067193675,
|
|
"calib/mu_c": 0.09688888888888887,
|
|
"calib/mu_w": 0.0960122699386503,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0140975459593047,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2823.0,
|
|
"completions/max_terminated_length": 2823.0,
|
|
"completions/mean_length": 546.21484375,
|
|
"completions/mean_terminated_length": 548.3568725585938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.09173333333333333,
|
|
"grad_norm": 0.0073614963330328465,
|
|
"learning_rate": 3.1666666666666667e-06,
|
|
"loss": 0.0669,
|
|
"num_tokens": 18763425.0,
|
|
"reward": 1.02299165725708,
|
|
"reward_std": 0.22230364382266998,
|
|
"rewards/accuracy_reward_step": 0.3515625,
|
|
"rewards/final_brier_reward_step": 0.6954777836799622,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8994483947753906,
|
|
"step": 86
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5043693009118542,
|
|
"calib/avg_num_step_conf": 4.55078125,
|
|
"calib/ece": 0.46102766798418976,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0011835106382978577,
|
|
"calib/mean_conf": 0.09628458498023715,
|
|
"calib/mu_c": 0.09680851063829786,
|
|
"calib/mu_w": 0.095625,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.014351255592481205,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2204.0,
|
|
"completions/max_terminated_length": 2204.0,
|
|
"completions/mean_length": 476.59765625,
|
|
"completions/mean_terminated_length": 478.4667053222656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 135.0,
|
|
"epoch": 0.0928,
|
|
"grad_norm": 0.010919037275016308,
|
|
"learning_rate": 3.138888888888889e-06,
|
|
"loss": 0.0504,
|
|
"num_tokens": 18966610.0,
|
|
"reward": 1.1162362098693848,
|
|
"reward_std": 0.21834619343280792,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5347750186920166,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.7969573736190796,
|
|
"step": 87
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.512905465101603,
|
|
"calib/avg_num_step_conf": 5.38671875,
|
|
"calib/ece": 0.35466403162055327,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.001821910892338796,
|
|
"calib/mean_conf": 0.0959288537549407,
|
|
"calib/mu_c": 0.09692982456140352,
|
|
"calib/mu_w": 0.09510791366906472,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.014677928277792457,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2949.0,
|
|
"completions/max_terminated_length": 2949.0,
|
|
"completions/mean_length": 567.5546875,
|
|
"completions/mean_terminated_length": 572.0236206054688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 204.0,
|
|
"epoch": 0.09386666666666667,
|
|
"grad_norm": 0.007051222026348114,
|
|
"learning_rate": 3.1111111111111116e-06,
|
|
"loss": 0.0465,
|
|
"num_tokens": 19197432.0,
|
|
"reward": 1.058126449584961,
|
|
"reward_std": 0.26280489563941956,
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
|
"rewards/final_brier_reward_step": 0.6045206785202026,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/stepwise_brier_reward": 0.8531519174575806,
|
|
"step": 88
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.503820409194241,
|
|
"calib/avg_num_step_conf": 4.55078125,
|
|
"calib/ece": 0.32231372549019605,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.000636524374842129,
|
|
"calib/mean_conf": 0.09729411764705881,
|
|
"calib/mu_c": 0.09766355140186915,
|
|
"calib/mu_w": 0.09702702702702702,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01192014955434385,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2276.0,
|
|
"completions/max_terminated_length": 2276.0,
|
|
"completions/mean_length": 562.671875,
|
|
"completions/mean_terminated_length": 562.671875,
|
|
"completions/min_length": 160.0,
|
|
"completions/min_terminated_length": 160.0,
|
|
"epoch": 0.09493333333333333,
|
|
"grad_norm": 0.0061728148721158504,
|
|
"learning_rate": 3.0833333333333336e-06,
|
|
"loss": 0.0435,
|
|
"num_tokens": 19426044.0,
|
|
"reward": 1.0548436641693115,
|
|
"reward_std": 0.14040419459342957,
|
|
"rewards/accuracy_reward_step": 0.41796875,
|
|
"rewards/final_brier_reward_step": 0.6501949429512024,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8486722707748413,
|
|
"step": 89
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4921259842519685,
|
|
"calib/avg_num_step_conf": 4.46484375,
|
|
"calib/ece": 0.40314960629921265,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0007874015748031565,
|
|
"calib/mean_conf": 0.0968503937007874,
|
|
"calib/mu_c": 0.09645669291338582,
|
|
"calib/mu_w": 0.09724409448818898,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.012147439858694105,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2791.0,
|
|
"completions/max_terminated_length": 2791.0,
|
|
"completions/mean_length": 550.640625,
|
|
"completions/mean_terminated_length": 550.640625,
|
|
"completions/min_length": 94.0,
|
|
"completions/min_terminated_length": 94.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.007021500263363123,
|
|
"learning_rate": 3.055555555555556e-06,
|
|
"loss": 0.0617,
|
|
"num_tokens": 19646008.0,
|
|
"reward": 1.105597734451294,
|
|
"reward_std": 0.22193342447280884,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/final_brier_reward_step": 0.5823438167572021,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8764536380767822,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5429648554336989,
|
|
"calib/avg_num_step_conf": 4.19140625,
|
|
"calib/ece": 0.437992125984252,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.004717098703888339,
|
|
"calib/mean_conf": 0.09744094488188976,
|
|
"calib/mu_c": 0.09963235294117646,
|
|
"calib/mu_w": 0.09491525423728812,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01187809042641385,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1506.0,
|
|
"completions/max_terminated_length": 1506.0,
|
|
"completions/mean_length": 530.578125,
|
|
"completions/mean_terminated_length": 532.6588745117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 217.0,
|
|
"epoch": 0.09706666666666666,
|
|
"grad_norm": 0.007281932048499584,
|
|
"learning_rate": 3.0277777777777776e-06,
|
|
"loss": 0.0262,
|
|
"num_tokens": 19865228.0,
|
|
"reward": 1.1158626079559326,
|
|
"reward_std": 0.20746996998786926,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/final_brier_reward_step": 0.5572363138198853,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8271024227142334,
|
|
"step": 91
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5242480620155038,
|
|
"calib/avg_num_step_conf": 3.9140625,
|
|
"calib/ece": 0.4098425196850393,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002424806201550389,
|
|
"calib/mean_conf": 0.09803149606299212,
|
|
"calib/mu_c": 0.09922480620155039,
|
|
"calib/mu_w": 0.0968,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009723692153723203,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1636.0,
|
|
"completions/max_terminated_length": 1636.0,
|
|
"completions/mean_length": 503.4375,
|
|
"completions/mean_terminated_length": 505.41180419921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.0,
|
|
"epoch": 0.09813333333333334,
|
|
"grad_norm": 0.006811084225773811,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.0072,
|
|
"num_tokens": 20076508.0,
|
|
"reward": 1.1008079051971436,
|
|
"reward_std": 0.21823862195014954,
|
|
"rewards/accuracy_reward_step": 0.50390625,
|
|
"rewards/final_brier_reward_step": 0.5709179639816284,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8520207405090332,
|
|
"step": 92
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5140108870468886,
|
|
"calib/avg_num_step_conf": 3.87890625,
|
|
"calib/ece": 0.363921568627451,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0014010887046888576,
|
|
"calib/mean_conf": 0.09882352941176469,
|
|
"calib/mu_c": 0.09957627118644068,
|
|
"calib/mu_w": 0.09817518248175182,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007578881603955957,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1927.0,
|
|
"completions/max_terminated_length": 1927.0,
|
|
"completions/mean_length": 514.77734375,
|
|
"completions/mean_terminated_length": 516.7960815429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 167.0,
|
|
"epoch": 0.0992,
|
|
"grad_norm": 0.008910668082535267,
|
|
"learning_rate": 2.9722222222222225e-06,
|
|
"loss": -0.0041,
|
|
"num_tokens": 20289747.0,
|
|
"reward": 1.0883748531341553,
|
|
"reward_std": 0.23094485700130463,
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
|
"rewards/final_brier_reward_step": 0.6171679496765137,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8769761323928833,
|
|
"step": 93
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5107987339415379,
|
|
"calib/avg_num_step_conf": 3.8515625,
|
|
"calib/ece": 0.3862204724409449,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0014522435300689113,
|
|
"calib/mean_conf": 0.09803149606299212,
|
|
"calib/mu_c": 0.09878048780487805,
|
|
"calib/mu_w": 0.09732824427480914,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.010688088157872621,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2796.0,
|
|
"completions/max_terminated_length": 2796.0,
|
|
"completions/mean_length": 520.30859375,
|
|
"completions/mean_terminated_length": 520.30859375,
|
|
"completions/min_length": 173.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.10026666666666667,
|
|
"grad_norm": 0.010884858667850494,
|
|
"learning_rate": 2.944444444444445e-06,
|
|
"loss": 0.0354,
|
|
"num_tokens": 20507306.0,
|
|
"reward": 1.0963938236236572,
|
|
"reward_std": 0.18074506521224976,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/final_brier_reward_step": 0.59312504529953,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8821374177932739,
|
|
"step": 94
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.49912761714855436,
|
|
"calib/avg_num_step_conf": 3.84375,
|
|
"calib/ece": 0.4375984251968503,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -8.723828514453635e-05,
|
|
"calib/mean_conf": 0.09783464566929133,
|
|
"calib/mu_c": 0.09779411764705882,
|
|
"calib/mu_w": 0.09788135593220336,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.010177374767488633,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1643.0,
|
|
"completions/max_terminated_length": 1643.0,
|
|
"completions/mean_length": 525.859375,
|
|
"completions/mean_terminated_length": 527.9215698242188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.0,
|
|
"epoch": 0.10133333333333333,
|
|
"grad_norm": 0.007196079473942518,
|
|
"learning_rate": 2.916666666666667e-06,
|
|
"loss": -0.0126,
|
|
"num_tokens": 20723734.0,
|
|
"reward": 1.1179805994033813,
|
|
"reward_std": 0.21131283044815063,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/final_brier_reward_step": 0.5552442073822021,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8395587205886841,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.529807948786343,
|
|
"calib/avg_num_step_conf": 4.03125,
|
|
"calib/ece": 0.5427450980392157,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002980794878634313,
|
|
"calib/mean_conf": 0.09647058823529411,
|
|
"calib/mu_c": 0.09754601226993863,
|
|
"calib/mu_w": 0.09456521739130432,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01280678885710426,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1303.0,
|
|
"completions/max_terminated_length": 1303.0,
|
|
"completions/mean_length": 500.0078125,
|
|
"completions/mean_terminated_length": 501.9686584472656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 182.0,
|
|
"epoch": 0.1024,
|
|
"grad_norm": 0.010168724693357944,
|
|
"learning_rate": 2.888888888888889e-06,
|
|
"loss": 0.0303,
|
|
"num_tokens": 20933232.0,
|
|
"reward": 1.156736969947815,
|
|
"reward_std": 0.18414181470870972,
|
|
"rewards/accuracy_reward_step": 0.63671875,
|
|
"rewards/final_brier_reward_step": 0.47416016459465027,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.7333148717880249,
|
|
"step": 96
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5306818181818183,
|
|
"calib/avg_num_step_conf": 4.2578125,
|
|
"calib/ece": 0.381547619047619,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003068181818181845,
|
|
"calib/mean_conf": 0.09464285714285715,
|
|
"calib/mu_c": 0.09625,
|
|
"calib/mu_w": 0.09318181818181816,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01546473935329355,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2074.0,
|
|
"completions/max_terminated_length": 2074.0,
|
|
"completions/mean_length": 520.75390625,
|
|
"completions/mean_terminated_length": 524.8543090820312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 197.0,
|
|
"epoch": 0.10346666666666667,
|
|
"grad_norm": 0.007721587549895048,
|
|
"learning_rate": 2.861111111111111e-06,
|
|
"loss": -0.0093,
|
|
"num_tokens": 21147297.0,
|
|
"reward": 1.1001715660095215,
|
|
"reward_std": 0.2343776524066925,
|
|
"rewards/accuracy_reward_step": 0.4765625,
|
|
"rewards/final_brier_reward_step": 0.5960644483566284,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/stepwise_brier_reward": 0.9101195931434631,
|
|
"step": 97
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.48524681430162064,
|
|
"calib/avg_num_step_conf": 3.921875,
|
|
"calib/ece": 0.3697647058823529,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0017617221328714433,
|
|
"calib/mean_conf": 0.09298039215686275,
|
|
"calib/mu_c": 0.09203389830508475,
|
|
"calib/mu_w": 0.0937956204379562,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.017771413701342168,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2979.0,
|
|
"completions/max_terminated_length": 2979.0,
|
|
"completions/mean_length": 588.20703125,
|
|
"completions/mean_terminated_length": 588.20703125,
|
|
"completions/min_length": 149.0,
|
|
"completions/min_terminated_length": 149.0,
|
|
"epoch": 0.10453333333333334,
|
|
"grad_norm": 0.006101598031818867,
|
|
"learning_rate": 2.8333333333333335e-06,
|
|
"loss": 0.047,
|
|
"num_tokens": 21379742.0,
|
|
"reward": 1.0822640657424927,
|
|
"reward_std": 0.24437329173088074,
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
|
"rewards/final_brier_reward_step": 0.6110738515853882,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8647211790084839,
|
|
"step": 98
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.55,
|
|
"calib/avg_num_step_conf": 3.64453125,
|
|
"calib/ece": 0.22842519685039375,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005057471264367813,
|
|
"calib/mean_conf": 0.08653543307086614,
|
|
"calib/mu_c": 0.09,
|
|
"calib/mu_w": 0.08494252873563218,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022303488401844006,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2316.0,
|
|
"completions/max_terminated_length": 2316.0,
|
|
"completions/mean_length": 618.5859375,
|
|
"completions/mean_terminated_length": 623.4566650390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 163.0,
|
|
"epoch": 0.1056,
|
|
"grad_norm": 0.005430999211966991,
|
|
"learning_rate": 2.805555555555556e-06,
|
|
"loss": 0.0359,
|
|
"num_tokens": 21619580.0,
|
|
"reward": 1.0024843215942383,
|
|
"reward_std": 0.18751585483551025,
|
|
"rewards/accuracy_reward_step": 0.3125,
|
|
"rewards/final_brier_reward_step": 0.7241469025611877,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9163306951522827,
|
|
"step": 99
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5367610062893081,
|
|
"calib/avg_num_step_conf": 3.6796875,
|
|
"calib/ece": 0.33039062500000005,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0038767295597484652,
|
|
"calib/mean_conf": 0.08367187500000002,
|
|
"calib/mu_c": 0.08594339622641513,
|
|
"calib/mu_w": 0.08206666666666666,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024442889231520385,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1528.0,
|
|
"completions/max_terminated_length": 1528.0,
|
|
"completions/mean_length": 561.5234375,
|
|
"completions/mean_terminated_length": 563.7255249023438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 181.0,
|
|
"epoch": 0.10666666666666667,
|
|
"grad_norm": 0.005294025409966707,
|
|
"learning_rate": 2.7777777777777783e-06,
|
|
"loss": 0.0466,
|
|
"num_tokens": 21846418.0,
|
|
"reward": 1.0728075504302979,
|
|
"reward_std": 0.18506084382534027,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/final_brier_reward_step": 0.6495109796524048,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9359585046768188,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5729030856149501,
|
|
"calib/avg_num_step_conf": 3.65625,
|
|
"calib/ece": 0.23243137254901963,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.007772707518470215,
|
|
"calib/mean_conf": 0.07345098039215685,
|
|
"calib/mu_c": 0.07884615384615383,
|
|
"calib/mu_w": 0.07107344632768361,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02616412281289482,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2482.0,
|
|
"completions/max_terminated_length": 2482.0,
|
|
"completions/mean_length": 620.73828125,
|
|
"completions/mean_terminated_length": 623.172607421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 220.0,
|
|
"epoch": 0.10773333333333333,
|
|
"grad_norm": 0.00502887275069952,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": 0.0173,
|
|
"num_tokens": 22087999.0,
|
|
"reward": 0.9983644485473633,
|
|
"reward_std": 0.20291808247566223,
|
|
"rewards/accuracy_reward_step": 0.3046875,
|
|
"rewards/final_brier_reward_step": 0.7256336212158203,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9281280040740967,
|
|
"step": 101
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6437863096741602,
|
|
"calib/avg_num_step_conf": 3.74609375,
|
|
"calib/ece": 0.5017254901960784,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01461038140944683,
|
|
"calib/mean_conf": 0.07866666666666666,
|
|
"calib/mu_c": 0.08479729729729729,
|
|
"calib/mu_w": 0.07018691588785046,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02501346042862481,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2390.0,
|
|
"completions/max_terminated_length": 2390.0,
|
|
"completions/mean_length": 516.31640625,
|
|
"completions/mean_terminated_length": 518.3411865234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 174.0,
|
|
"epoch": 0.1088,
|
|
"grad_norm": 0.0057980152778327465,
|
|
"learning_rate": 2.7222222222222224e-06,
|
|
"loss": 0.0445,
|
|
"num_tokens": 22302552.0,
|
|
"reward": 1.1512010097503662,
|
|
"reward_std": 0.16641102731227875,
|
|
"rewards/accuracy_reward_step": 0.578125,
|
|
"rewards/final_brier_reward_step": 0.5092281103134155,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8754100203514099,
|
|
"step": 102
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6484340676583192,
|
|
"calib/avg_num_step_conf": 3.4453125,
|
|
"calib/ece": 0.37744094488188973,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01481265298437208,
|
|
"calib/mean_conf": 0.06744094488188977,
|
|
"calib/mu_c": 0.0756637168141593,
|
|
"calib/mu_w": 0.06085106382978722,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023945197780452628,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2455.0,
|
|
"completions/max_terminated_length": 2455.0,
|
|
"completions/mean_length": 612.11328125,
|
|
"completions/mean_terminated_length": 614.5137329101562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.10986666666666667,
|
|
"grad_norm": 0.00619841692969203,
|
|
"learning_rate": 2.6944444444444444e-06,
|
|
"loss": 0.0553,
|
|
"num_tokens": 22539485.0,
|
|
"reward": 1.0578868389129639,
|
|
"reward_std": 0.15986822545528412,
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
|
"rewards/final_brier_reward_step": 0.6124964952468872,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8440539836883545,
|
|
"step": 103
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6046659867414584,
|
|
"calib/avg_num_step_conf": 3.54296875,
|
|
"calib/ece": 0.3460629921259843,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.010466598674145844,
|
|
"calib/mean_conf": 0.07125984251968505,
|
|
"calib/mu_c": 0.07735849056603773,
|
|
"calib/mu_w": 0.06689189189189189,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024718641184790967,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2658.0,
|
|
"completions/max_terminated_length": 2658.0,
|
|
"completions/mean_length": 542.8828125,
|
|
"completions/mean_terminated_length": 545.0117797851562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 146.0,
|
|
"epoch": 0.11093333333333333,
|
|
"grad_norm": 0.006147816311568022,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.0104,
|
|
"num_tokens": 22760823.0,
|
|
"reward": 1.0557448863983154,
|
|
"reward_std": 0.2222408950328827,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/final_brier_reward_step": 0.6210156083106995,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.9340734481811523,
|
|
"step": 104
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5702894088669951,
|
|
"calib/avg_num_step_conf": 3.49609375,
|
|
"calib/ece": 0.3773046875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.007169950738916256,
|
|
"calib/mean_conf": 0.07582031250000001,
|
|
"calib/mu_c": 0.07974137931034483,
|
|
"calib/mu_w": 0.07257142857142858,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.025266372660165206,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2389.0,
|
|
"completions/max_terminated_length": 2389.0,
|
|
"completions/mean_length": 529.30859375,
|
|
"completions/mean_terminated_length": 531.3843383789062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 165.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.005158624146133661,
|
|
"learning_rate": 2.6388888888888893e-06,
|
|
"loss": 0.0376,
|
|
"num_tokens": 22977766.0,
|
|
"reward": 1.0840234756469727,
|
|
"reward_std": 0.1894109547138214,
|
|
"rewards/accuracy_reward_step": 0.453125,
|
|
"rewards/final_brier_reward_step": 0.6088863611221313,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9073833227157593,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5456932773109244,
|
|
"calib/avg_num_step_conf": 3.20703125,
|
|
"calib/ece": 0.3913725490196079,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00456932773109249,
|
|
"calib/mean_conf": 0.07529411764705883,
|
|
"calib/mu_c": 0.07773109243697483,
|
|
"calib/mu_w": 0.07316176470588234,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024998269836324447,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1906.0,
|
|
"completions/max_terminated_length": 1906.0,
|
|
"completions/mean_length": 476.98828125,
|
|
"completions/mean_terminated_length": 478.8588562011719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 219.0,
|
|
"epoch": 0.11306666666666666,
|
|
"grad_norm": 0.005500629544258118,
|
|
"learning_rate": 2.6111111111111113e-06,
|
|
"loss": -0.0615,
|
|
"num_tokens": 23180139.0,
|
|
"reward": 1.0853602886199951,
|
|
"reward_std": 0.1865309774875641,
|
|
"rewards/accuracy_reward_step": 0.46484375,
|
|
"rewards/final_brier_reward_step": 0.5926367044448853,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9014803171157837,
|
|
"step": 106
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5015282107708294,
|
|
"calib/avg_num_step_conf": 3.5234375,
|
|
"calib/ece": 0.406015625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 3.545448988326316e-05,
|
|
"calib/mean_conf": 0.07445312500000001,
|
|
"calib/mu_c": 0.07447154471544716,
|
|
"calib/mu_w": 0.0744360902255639,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.025273769559255997,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1795.0,
|
|
"completions/max_terminated_length": 1795.0,
|
|
"completions/mean_length": 463.90234375,
|
|
"completions/mean_terminated_length": 465.7215881347656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.0,
|
|
"epoch": 0.11413333333333334,
|
|
"grad_norm": 0.005841091740876436,
|
|
"learning_rate": 2.5833333333333337e-06,
|
|
"loss": 0.0155,
|
|
"num_tokens": 23379194.0,
|
|
"reward": 1.1032767295837402,
|
|
"reward_std": 0.22194012999534607,
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
|
"rewards/final_brier_reward_step": 0.5849117040634155,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9214085340499878,
|
|
"step": 107
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5366594909459288,
|
|
"calib/avg_num_step_conf": 3.3515625,
|
|
"calib/ece": 0.5136470588235295,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0037241990629352656,
|
|
"calib/mean_conf": 0.07066666666666666,
|
|
"calib/mu_c": 0.07221476510067112,
|
|
"calib/mu_w": 0.06849056603773586,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02518844013312197,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2522.0,
|
|
"completions/max_terminated_length": 2522.0,
|
|
"completions/mean_length": 527.16796875,
|
|
"completions/mean_terminated_length": 527.16796875,
|
|
"completions/min_length": 133.0,
|
|
"completions/min_terminated_length": 133.0,
|
|
"epoch": 0.1152,
|
|
"grad_norm": 0.005249754525721073,
|
|
"learning_rate": 2.5555555555555557e-06,
|
|
"loss": 0.0701,
|
|
"num_tokens": 23593061.0,
|
|
"reward": 1.139378309249878,
|
|
"reward_std": 0.20425044000148773,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/final_brier_reward_step": 0.49251872301101685,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8459128141403198,
|
|
"step": 108
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5496503496503496,
|
|
"calib/avg_num_step_conf": 3.69921875,
|
|
"calib/ece": 0.3754545454545455,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0055314685314685405,
|
|
"calib/mean_conf": 0.05932806324110672,
|
|
"calib/mu_c": 0.06245454545454546,
|
|
"calib/mu_w": 0.056923076923076917,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023370682856895336,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2558.0,
|
|
"completions/max_terminated_length": 2558.0,
|
|
"completions/mean_length": 519.0546875,
|
|
"completions/mean_terminated_length": 521.0902099609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 199.0,
|
|
"epoch": 0.11626666666666667,
|
|
"grad_norm": 0.0058748298324644566,
|
|
"learning_rate": 2.5277777777777778e-06,
|
|
"loss": 0.0096,
|
|
"num_tokens": 23806219.0,
|
|
"reward": 1.050431728363037,
|
|
"reward_std": 0.1639152467250824,
|
|
"rewards/accuracy_reward_step": 0.4296875,
|
|
"rewards/final_brier_reward_step": 0.6082472801208496,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8711696267127991,
|
|
"step": 109
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5218069488622462,
|
|
"calib/avg_num_step_conf": 3.6171875,
|
|
"calib/ece": 0.42566406249999994,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002355028137998545,
|
|
"calib/mean_conf": 0.05089843750000001,
|
|
"calib/mu_c": 0.052131147540983615,
|
|
"calib/mu_w": 0.04977611940298507,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.018444740037707057,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1400.0,
|
|
"completions/max_terminated_length": 1400.0,
|
|
"completions/mean_length": 436.265625,
|
|
"completions/mean_terminated_length": 437.97650146484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 146.0,
|
|
"epoch": 0.11733333333333333,
|
|
"grad_norm": 0.006007987540215254,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0107,
|
|
"num_tokens": 23998503.0,
|
|
"reward": 1.0971605777740479,
|
|
"reward_std": 0.1857801228761673,
|
|
"rewards/accuracy_reward_step": 0.4765625,
|
|
"rewards/final_brier_reward_step": 0.5701941251754761,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9420040249824524,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5945452254976064,
|
|
"calib/avg_num_step_conf": 4.37890625,
|
|
"calib/ece": 0.46710317460317463,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.007698412698412694,
|
|
"calib/mean_conf": 0.032896825396825397,
|
|
"calib/mu_c": 0.036746031746031735,
|
|
"calib/mu_w": 0.02904761904761904,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.020623737812299352,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2715.0,
|
|
"completions/max_terminated_length": 2715.0,
|
|
"completions/mean_length": 502.1328125,
|
|
"completions/mean_terminated_length": 502.1328125,
|
|
"completions/min_length": 144.0,
|
|
"completions/min_terminated_length": 144.0,
|
|
"epoch": 0.1184,
|
|
"grad_norm": 0.005854738410562277,
|
|
"learning_rate": 2.4722222222222226e-06,
|
|
"loss": 0.1053,
|
|
"num_tokens": 24210137.0,
|
|
"reward": 1.0687568187713623,
|
|
"reward_std": 0.19579537212848663,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/final_brier_reward_step": 0.526875376701355,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8587761521339417,
|
|
"step": 111
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5454084221207509,
|
|
"calib/avg_num_step_conf": 4.51171875,
|
|
"calib/ece": 0.40416535433070866,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0035213089802130816,
|
|
"calib/mean_conf": 0.02103149606299213,
|
|
"calib/mu_c": 0.023055555555555548,
|
|
"calib/mu_w": 0.019534246575342466,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.017747324078153975,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2758.0,
|
|
"completions/max_terminated_length": 2758.0,
|
|
"completions/mean_length": 525.72265625,
|
|
"completions/mean_terminated_length": 527.7843627929688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 225.0,
|
|
"epoch": 0.11946666666666667,
|
|
"grad_norm": 0.005254555959254503,
|
|
"learning_rate": 2.4444444444444447e-06,
|
|
"loss": 0.065,
|
|
"num_tokens": 24428322.0,
|
|
"reward": 1.0399373769760132,
|
|
"reward_std": 0.16829246282577515,
|
|
"rewards/accuracy_reward_step": 0.421875,
|
|
"rewards/final_brier_reward_step": 0.5890142321586609,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8973459005355835,
|
|
"step": 112
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5124275317626742,
|
|
"calib/avg_num_step_conf": 5.69921875,
|
|
"calib/ece": 0.4572078431372549,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0007307265326261247,
|
|
"calib/mean_conf": 0.01730196078431373,
|
|
"calib/mu_c": 0.017685950413223142,
|
|
"calib/mu_w": 0.016955223880597017,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0154875976971845,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2456.0,
|
|
"completions/max_terminated_length": 2456.0,
|
|
"completions/mean_length": 478.8515625,
|
|
"completions/mean_terminated_length": 478.8515625,
|
|
"completions/min_length": 187.0,
|
|
"completions/min_terminated_length": 187.0,
|
|
"epoch": 0.12053333333333334,
|
|
"grad_norm": 0.006173197645694017,
|
|
"learning_rate": 2.4166666666666667e-06,
|
|
"loss": 0.0511,
|
|
"num_tokens": 24631788.0,
|
|
"reward": 1.0740957260131836,
|
|
"reward_std": 0.19459447264671326,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/final_brier_reward_step": 0.5396190881729126,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9280819892883301,
|
|
"step": 113
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5005526897568165,
|
|
"calib/avg_num_step_conf": 5.55859375,
|
|
"calib/ece": 0.52037890625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00010476541390322391,
|
|
"calib/mean_conf": 0.01868359375,
|
|
"calib/mu_c": 0.018731884057971014,
|
|
"calib/mu_w": 0.01862711864406779,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.016839009199919125,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1332.0,
|
|
"completions/max_terminated_length": 1332.0,
|
|
"completions/mean_length": 446.0625,
|
|
"completions/mean_terminated_length": 447.8117980957031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.1216,
|
|
"grad_norm": 0.0068952166475355625,
|
|
"learning_rate": 2.388888888888889e-06,
|
|
"loss": 0.0109,
|
|
"num_tokens": 24826684.0,
|
|
"reward": 1.1039800643920898,
|
|
"reward_std": 0.14307743310928345,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.4801192879676819,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9009939432144165,
|
|
"step": 114
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5522073412698412,
|
|
"calib/avg_num_step_conf": 5.76953125,
|
|
"calib/ece": 0.42344921874999997,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003957341269841274,
|
|
"calib/mean_conf": 0.014050781250000005,
|
|
"calib/mu_c": 0.01627678571428572,
|
|
"calib/mu_w": 0.012319444444444445,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.012486921258446713,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1212.0,
|
|
"completions/max_terminated_length": 1212.0,
|
|
"completions/mean_length": 490.46484375,
|
|
"completions/mean_terminated_length": 492.3882751464844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.0,
|
|
"epoch": 0.12266666666666666,
|
|
"grad_norm": 0.005587155930697918,
|
|
"learning_rate": 2.361111111111111e-06,
|
|
"loss": -0.0196,
|
|
"num_tokens": 25033187.0,
|
|
"reward": 1.0444157123565674,
|
|
"reward_std": 0.17458747327327728,
|
|
"rewards/accuracy_reward_step": 0.4375,
|
|
"rewards/final_brier_reward_step": 0.5763888359069824,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.8748853206634521,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5472972972972973,
|
|
"calib/avg_num_step_conf": 6.546875,
|
|
"calib/ece": 0.40129881889763774,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.004526185619581846,
|
|
"calib/mean_conf": 0.0160240157480315,
|
|
"calib/mu_c": 0.018661320754716982,
|
|
"calib/mu_w": 0.014135135135135136,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.014862978302858687,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2513.0,
|
|
"completions/max_terminated_length": 2513.0,
|
|
"completions/mean_length": 537.7265625,
|
|
"completions/mean_terminated_length": 539.8353271484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 155.0,
|
|
"epoch": 0.12373333333333333,
|
|
"grad_norm": 0.007462795823812485,
|
|
"learning_rate": 2.3333333333333336e-06,
|
|
"loss": 0.0444,
|
|
"num_tokens": 25251045.0,
|
|
"reward": 1.0313544273376465,
|
|
"reward_std": 0.17312878370285034,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/final_brier_reward_step": 0.5931049585342407,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8860822319984436,
|
|
"step": 116
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5420689655172414,
|
|
"calib/avg_num_step_conf": 6.5390625,
|
|
"calib/ece": 0.41648980392156865,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0034036990595611317,
|
|
"calib/mean_conf": 0.014882745098039217,
|
|
"calib/mu_c": 0.016818181818181822,
|
|
"calib/mu_w": 0.01341448275862069,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01337153899929005,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2354.0,
|
|
"completions/max_terminated_length": 2354.0,
|
|
"completions/mean_length": 501.64453125,
|
|
"completions/mean_terminated_length": 503.6117858886719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.0,
|
|
"epoch": 0.1248,
|
|
"grad_norm": 0.0062988935969769955,
|
|
"learning_rate": 2.305555555555556e-06,
|
|
"loss": 0.0245,
|
|
"num_tokens": 25461746.0,
|
|
"reward": 1.0523920059204102,
|
|
"reward_std": 0.15046769380569458,
|
|
"rewards/accuracy_reward_step": 0.4296875,
|
|
"rewards/final_brier_reward_step": 0.5804606676101685,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9314590692520142,
|
|
"step": 117
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5618512110726643,
|
|
"calib/avg_num_step_conf": 6.94921875,
|
|
"calib/ece": 0.4460192156862745,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0044582983193277165,
|
|
"calib/mean_conf": 0.020647450980392158,
|
|
"calib/mu_c": 0.0230252100840336,
|
|
"calib/mu_w": 0.018566911764705884,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0176806214782015,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2538.0,
|
|
"completions/max_terminated_length": 2538.0,
|
|
"completions/mean_length": 506.53125,
|
|
"completions/mean_terminated_length": 508.5176696777344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 210.0,
|
|
"epoch": 0.12586666666666665,
|
|
"grad_norm": 0.008921671658754349,
|
|
"learning_rate": 2.277777777777778e-06,
|
|
"loss": 0.0534,
|
|
"num_tokens": 25671106.0,
|
|
"reward": 1.0745387077331543,
|
|
"reward_std": 0.13702334463596344,
|
|
"rewards/accuracy_reward_step": 0.46875,
|
|
"rewards/final_brier_reward_step": 0.5519201755523682,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9208766222000122,
|
|
"step": 118
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5277355623100304,
|
|
"calib/avg_num_step_conf": 6.5625,
|
|
"calib/ece": 0.4224110671936759,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0022270770010131696,
|
|
"calib/mean_conf": 0.020276679841897235,
|
|
"calib/mu_c": 0.021517857142857144,
|
|
"calib/mu_w": 0.019290780141843974,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.017363884240948854,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1700.0,
|
|
"completions/max_terminated_length": 1700.0,
|
|
"completions/mean_length": 512.44921875,
|
|
"completions/mean_terminated_length": 516.4842529296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 131.0,
|
|
"epoch": 0.12693333333333334,
|
|
"grad_norm": 0.008361544460058212,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": -0.0055,
|
|
"num_tokens": 25883037.0,
|
|
"reward": 1.0460152626037598,
|
|
"reward_std": 0.20650334656238556,
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
|
"rewards/final_brier_reward_step": 0.5649992227554321,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8946875929832458,
|
|
"step": 119
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6579464505035617,
|
|
"calib/avg_num_step_conf": 5.87890625,
|
|
"calib/ece": 0.510078125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.012579218865143706,
|
|
"calib/mean_conf": 0.028984375,
|
|
"calib/mu_c": 0.034782608695652174,
|
|
"calib/mu_w": 0.022203389830508468,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.019954629935415365,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1746.0,
|
|
"completions/max_terminated_length": 1746.0,
|
|
"completions/mean_length": 489.9375,
|
|
"completions/mean_terminated_length": 491.8588562011719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 215.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.00834466703236103,
|
|
"learning_rate": 2.222222222222222e-06,
|
|
"loss": 0.0239,
|
|
"num_tokens": 26090829.0,
|
|
"reward": 1.1241123676300049,
|
|
"reward_std": 0.13478730618953705,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.4971992075443268,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9458011984825134,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5345997286295794,
|
|
"calib/avg_num_step_conf": 5.79296875,
|
|
"calib/ece": 0.4369019607843137,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002664980880720376,
|
|
"calib/mean_conf": 0.037607843137254904,
|
|
"calib/mu_c": 0.03900826446280992,
|
|
"calib/mu_w": 0.03634328358208955,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0183046960988024,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2916.0,
|
|
"completions/max_terminated_length": 2916.0,
|
|
"completions/mean_length": 539.2265625,
|
|
"completions/mean_terminated_length": 539.2265625,
|
|
"completions/min_length": 203.0,
|
|
"completions/min_terminated_length": 203.0,
|
|
"epoch": 0.12906666666666666,
|
|
"grad_norm": 0.006870029028505087,
|
|
"learning_rate": 2.1944444444444445e-06,
|
|
"loss": 0.0881,
|
|
"num_tokens": 26309607.0,
|
|
"reward": 1.0757176876068115,
|
|
"reward_std": 0.20989492535591125,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/final_brier_reward_step": 0.5585699081420898,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8966686725616455,
|
|
"step": 121
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4999684642068748,
|
|
"calib/avg_num_step_conf": 5.3046875,
|
|
"calib/ece": 0.5431640625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 2.2075055187621706e-05,
|
|
"calib/mean_conf": 0.04667968750000001,
|
|
"calib/mu_c": 0.04668874172185429,
|
|
"calib/mu_w": 0.04666666666666667,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0108753114853021,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1019.0,
|
|
"completions/max_terminated_length": 1019.0,
|
|
"completions/mean_length": 452.5546875,
|
|
"completions/mean_terminated_length": 454.3294372558594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 189.0,
|
|
"epoch": 0.13013333333333332,
|
|
"grad_norm": 0.007138589397072792,
|
|
"learning_rate": 2.166666666666667e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 26508485.0,
|
|
"reward": 1.1392134428024292,
|
|
"reward_std": 0.1293630599975586,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.45904064178466797,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8809599876403809,
|
|
"step": 122
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4844704844704845,
|
|
"calib/avg_num_step_conf": 5.1015625,
|
|
"calib/ece": 0.3895275590551181,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0015220815220815184,
|
|
"calib/mean_conf": 0.049055118110236225,
|
|
"calib/mu_c": 0.048198198198198185,
|
|
"calib/mu_w": 0.0497202797202797,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0007874015748031496,
|
|
"calib/std_conf": 0.008365006870689341,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2300.0,
|
|
"completions/max_terminated_length": 2300.0,
|
|
"completions/mean_length": 531.0703125,
|
|
"completions/mean_terminated_length": 533.1529541015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 218.0,
|
|
"epoch": 0.1312,
|
|
"grad_norm": 0.00916409119963646,
|
|
"learning_rate": 2.138888888888889e-06,
|
|
"loss": 0.0116,
|
|
"num_tokens": 26725407.0,
|
|
"reward": 1.0490834712982178,
|
|
"reward_std": 0.1959851086139679,
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
|
"rewards/final_brier_reward_step": 0.5940663814544678,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8785138130187988,
|
|
"step": 123
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4929886335557203,
|
|
"calib/avg_num_step_conf": 5.17578125,
|
|
"calib/ece": 0.5056640625000001,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0006683963429701192,
|
|
"calib/mean_conf": 0.049804687500000014,
|
|
"calib/mu_c": 0.049507042253521115,
|
|
"calib/mu_w": 0.050175438596491234,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.000390625,
|
|
"calib/std_conf": 0.0047967413967550665,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1446.0,
|
|
"completions/max_terminated_length": 1446.0,
|
|
"completions/mean_length": 471.6328125,
|
|
"completions/mean_terminated_length": 473.4823913574219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 181.0,
|
|
"epoch": 0.13226666666666667,
|
|
"grad_norm": 0.007121821399778128,
|
|
"learning_rate": 2.1111111111111114e-06,
|
|
"loss": 0.0172,
|
|
"num_tokens": 26928641.0,
|
|
"reward": 1.133141279220581,
|
|
"reward_std": 0.16863961517810822,
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
|
"rewards/final_brier_reward_step": 0.4977308511734009,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9183536767959595,
|
|
"step": 124
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4970704329591711,
|
|
"calib/avg_num_step_conf": 4.9765625,
|
|
"calib/ece": 0.4235294117647058,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0002929567040828862,
|
|
"calib/mean_conf": 0.050980392156862744,
|
|
"calib/mu_c": 0.05082644628099174,
|
|
"calib/mu_w": 0.05111940298507463,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.006932419423397525,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1844.0,
|
|
"completions/max_terminated_length": 1844.0,
|
|
"completions/mean_length": 483.42578125,
|
|
"completions/mean_terminated_length": 485.32159423828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 183.0,
|
|
"epoch": 0.13333333333333333,
|
|
"grad_norm": 0.006879989989101887,
|
|
"learning_rate": 2.0833333333333334e-06,
|
|
"loss": -0.0187,
|
|
"num_tokens": 27132886.0,
|
|
"reward": 1.0873610973358154,
|
|
"reward_std": 0.18790683150291443,
|
|
"rewards/accuracy_reward_step": 0.4765625,
|
|
"rewards/final_brier_reward_step": 0.56884765625,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9070613384246826,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.500062003968254,
|
|
"calib/avg_num_step_conf": 5.10546875,
|
|
"calib/ece": 0.4456692913385827,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 6.200396825382926e-06,
|
|
"calib/mean_conf": 0.05039370078740157,
|
|
"calib/mu_c": 0.05039682539682539,
|
|
"calib/mu_w": 0.05039062500000001,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0044192803780794595,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2469.0,
|
|
"completions/max_terminated_length": 2469.0,
|
|
"completions/mean_length": 479.2265625,
|
|
"completions/mean_terminated_length": 479.2265625,
|
|
"completions/min_length": 120.0,
|
|
"completions/min_terminated_length": 120.0,
|
|
"epoch": 0.1344,
|
|
"grad_norm": 0.006863424088805914,
|
|
"learning_rate": 2.0555555555555555e-06,
|
|
"loss": -0.0045,
|
|
"num_tokens": 27336712.0,
|
|
"reward": 1.0819272994995117,
|
|
"reward_std": 0.1444890946149826,
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
|
"rewards/final_brier_reward_step": 0.5466894507408142,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8702676892280579,
|
|
"step": 126
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5727709790209791,
|
|
"calib/avg_num_step_conf": 4.91015625,
|
|
"calib/ece": 0.383921568627451,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00727709790209792,
|
|
"calib/mean_conf": 0.05529411764705882,
|
|
"calib/mu_c": 0.05937500000000001,
|
|
"calib/mu_w": 0.05209790209790209,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.015384349212496496,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2798.0,
|
|
"completions/max_terminated_length": 2798.0,
|
|
"completions/mean_length": 479.375,
|
|
"completions/mean_terminated_length": 481.25494384765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.0,
|
|
"epoch": 0.13546666666666668,
|
|
"grad_norm": 0.007512710057199001,
|
|
"learning_rate": 2.027777777777778e-06,
|
|
"loss": 0.058,
|
|
"num_tokens": 27538784.0,
|
|
"reward": 1.0702171325683594,
|
|
"reward_std": 0.14376309514045715,
|
|
"rewards/accuracy_reward_step": 0.4375,
|
|
"rewards/final_brier_reward_step": 0.6072656512260437,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9178997278213501,
|
|
"step": 127
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5271164021164021,
|
|
"calib/avg_num_step_conf": 5.05859375,
|
|
"calib/ece": 0.37,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0027116402116402136,
|
|
"calib/mean_conf": 0.053529411764705874,
|
|
"calib/mu_c": 0.05509259259259259,
|
|
"calib/mu_w": 0.052380952380952375,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01280678885710426,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2553.0,
|
|
"completions/max_terminated_length": 2553.0,
|
|
"completions/mean_length": 520.66015625,
|
|
"completions/mean_terminated_length": 520.66015625,
|
|
"completions/min_length": 180.0,
|
|
"completions/min_terminated_length": 180.0,
|
|
"epoch": 0.13653333333333334,
|
|
"grad_norm": 0.006195978261530399,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 0.0201,
|
|
"num_tokens": 27754417.0,
|
|
"reward": 1.0464067459106445,
|
|
"reward_std": 0.18519645929336548,
|
|
"rewards/accuracy_reward_step": 0.421875,
|
|
"rewards/final_brier_reward_step": 0.6176855564117432,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8643181324005127,
|
|
"step": 128
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.48170731707317077,
|
|
"calib/avg_num_step_conf": 4.8359375,
|
|
"calib/ece": 0.45843137254901967,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0018292682926829285,
|
|
"calib/mean_conf": 0.05921568627450981,
|
|
"calib/mu_c": 0.05833333333333333,
|
|
"calib/mu_w": 0.060162601626016256,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01938699152048357,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1233.0,
|
|
"completions/max_terminated_length": 1233.0,
|
|
"completions/mean_length": 448.078125,
|
|
"completions/mean_terminated_length": 449.8353271484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.1376,
|
|
"grad_norm": 0.007027975749224424,
|
|
"learning_rate": 1.9722222222222224e-06,
|
|
"loss": 0.0321,
|
|
"num_tokens": 27947189.0,
|
|
"reward": 1.118253469467163,
|
|
"reward_std": 0.16204269230365753,
|
|
"rewards/accuracy_reward_step": 0.51953125,
|
|
"rewards/final_brier_reward_step": 0.5367578268051147,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9229358434677124,
|
|
"step": 129
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.550257486172039,
|
|
"calib/avg_num_step_conf": 4.44921875,
|
|
"calib/ece": 0.5155511811023622,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005025748617203882,
|
|
"calib/mean_conf": 0.06318897637795276,
|
|
"calib/mu_c": 0.06530612244897958,
|
|
"calib/mu_w": 0.0602803738317757,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02203405820541105,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2530.0,
|
|
"completions/max_terminated_length": 2530.0,
|
|
"completions/mean_length": 429.99609375,
|
|
"completions/mean_terminated_length": 433.38189697265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 182.0,
|
|
"epoch": 0.13866666666666666,
|
|
"grad_norm": 0.009456264786422253,
|
|
"learning_rate": 1.944444444444445e-06,
|
|
"loss": 0.017,
|
|
"num_tokens": 28138236.0,
|
|
"reward": 1.1353545188903809,
|
|
"reward_std": 0.15697000920772552,
|
|
"rewards/accuracy_reward_step": 0.57421875,
|
|
"rewards/final_brier_reward_step": 0.488525390625,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8706176280975342,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6228411894559148,
|
|
"calib/avg_num_step_conf": 4.30859375,
|
|
"calib/ece": 0.3379446640316206,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01228411894559149,
|
|
"calib/mean_conf": 0.06521739130434782,
|
|
"calib/mu_c": 0.07254901960784313,
|
|
"calib/mu_w": 0.06026490066225164,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023006533139692094,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2883.0,
|
|
"completions/max_terminated_length": 2883.0,
|
|
"completions/mean_length": 468.35546875,
|
|
"completions/mean_terminated_length": 470.19219970703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 235.0,
|
|
"epoch": 0.13973333333333332,
|
|
"grad_norm": 0.009817316196858883,
|
|
"learning_rate": 1.916666666666667e-06,
|
|
"loss": 0.031,
|
|
"num_tokens": 28340023.0,
|
|
"reward": 1.059814453125,
|
|
"reward_std": 0.13384954631328583,
|
|
"rewards/accuracy_reward_step": 0.40234375,
|
|
"rewards/final_brier_reward_step": 0.6429296731948853,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9487107992172241,
|
|
"step": 131
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.600626959247649,
|
|
"calib/avg_num_step_conf": 4.3046875,
|
|
"calib/ece": 0.49745098039215685,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.010062695924764886,
|
|
"calib/mean_conf": 0.07117647058823529,
|
|
"calib/mu_c": 0.07551724137931035,
|
|
"calib/mu_w": 0.06545454545454546,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024705882352941175,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2602.0,
|
|
"completions/max_terminated_length": 2602.0,
|
|
"completions/mean_length": 474.27734375,
|
|
"completions/mean_terminated_length": 474.27734375,
|
|
"completions/min_length": 136.0,
|
|
"completions/min_terminated_length": 136.0,
|
|
"epoch": 0.1408,
|
|
"grad_norm": 0.012203056365251541,
|
|
"learning_rate": 1.888888888888889e-06,
|
|
"loss": 0.0832,
|
|
"num_tokens": 28542710.0,
|
|
"reward": 1.1420726776123047,
|
|
"reward_std": 0.1849452704191208,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/final_brier_reward_step": 0.5095800757408142,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8850681781768799,
|
|
"step": 132
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5585548172757474,
|
|
"calib/avg_num_step_conf": 4.31640625,
|
|
"calib/ece": 0.26777343750000004,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005855481727574739,
|
|
"calib/mean_conf": 0.06035156250000001,
|
|
"calib/mu_c": 0.06428571428571428,
|
|
"calib/mu_w": 0.05843023255813954,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02025890616021985,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1436.0,
|
|
"completions/max_terminated_length": 1436.0,
|
|
"completions/mean_length": 509.28515625,
|
|
"completions/mean_terminated_length": 511.2823791503906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.0,
|
|
"epoch": 0.14186666666666667,
|
|
"grad_norm": 0.010452408343553543,
|
|
"learning_rate": 1.8611111111111113e-06,
|
|
"loss": 0.0137,
|
|
"num_tokens": 28755111.0,
|
|
"reward": 1.0223243236541748,
|
|
"reward_std": 0.202122300863266,
|
|
"rewards/accuracy_reward_step": 0.328125,
|
|
"rewards/final_brier_reward_step": 0.7100098133087158,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9567779302597046,
|
|
"step": 133
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5752956216043463,
|
|
"calib/avg_num_step_conf": 4.05078125,
|
|
"calib/ece": 0.34685039370078746,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.007529562160434655,
|
|
"calib/mean_conf": 0.06653543307086614,
|
|
"calib/mu_c": 0.07095238095238096,
|
|
"calib/mu_w": 0.0634228187919463,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023523416135889222,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2411.0,
|
|
"completions/max_terminated_length": 2411.0,
|
|
"completions/mean_length": 540.3515625,
|
|
"completions/mean_terminated_length": 542.4706420898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 197.0,
|
|
"epoch": 0.14293333333333333,
|
|
"grad_norm": 0.011523211374878883,
|
|
"learning_rate": 1.8333333333333333e-06,
|
|
"loss": 0.0359,
|
|
"num_tokens": 28978073.0,
|
|
"reward": 1.058791160583496,
|
|
"reward_std": 0.19045865535736084,
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
|
"rewards/final_brier_reward_step": 0.6352930068969727,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.9270786046981812,
|
|
"step": 134
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6085626911314984,
|
|
"calib/avg_num_step_conf": 3.7421875,
|
|
"calib/ece": 0.48539999999999994,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.010856269113149838,
|
|
"calib/mean_conf": 0.0786,
|
|
"calib/mu_c": 0.08333333333333331,
|
|
"calib/mu_w": 0.07247706422018348,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.9765625,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024739442192579852,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2987.0,
|
|
"completions/max_terminated_length": 2987.0,
|
|
"completions/mean_length": 527.0390625,
|
|
"completions/mean_terminated_length": 529.1058959960938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 168.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.010717969387769699,
|
|
"learning_rate": 1.8055555555555557e-06,
|
|
"loss": 0.0246,
|
|
"num_tokens": 29194555.0,
|
|
"reward": 1.1342743635177612,
|
|
"reward_std": 0.2033892273902893,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5109472870826721,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.9214526414871216,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6843548387096774,
|
|
"calib/avg_num_step_conf": 4.29296875,
|
|
"calib/ece": 0.31686274509803924,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01843548387096776,
|
|
"calib/mean_conf": 0.07529411764705882,
|
|
"calib/mu_c": 0.08650000000000002,
|
|
"calib/mu_w": 0.06806451612903226,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024998269836324447,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2520.0,
|
|
"completions/max_terminated_length": 2520.0,
|
|
"completions/mean_length": 515.93359375,
|
|
"completions/mean_terminated_length": 515.93359375,
|
|
"completions/min_length": 152.0,
|
|
"completions/min_terminated_length": 152.0,
|
|
"epoch": 0.14506666666666668,
|
|
"grad_norm": 0.013928011059761047,
|
|
"learning_rate": 1.777777777777778e-06,
|
|
"loss": 0.0128,
|
|
"num_tokens": 29410802.0,
|
|
"reward": 1.0524232387542725,
|
|
"reward_std": 0.15639856457710266,
|
|
"rewards/accuracy_reward_step": 0.390625,
|
|
"rewards/final_brier_reward_step": 0.6629101634025574,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.9244980216026306,
|
|
"step": 136
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6169230769230769,
|
|
"calib/avg_num_step_conf": 4.39453125,
|
|
"calib/ece": 0.427843137254902,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01169230769230764,
|
|
"calib/mean_conf": 0.08196078431372548,
|
|
"calib/mu_c": 0.08769230769230768,
|
|
"calib/mu_w": 0.07600000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024011403160535898,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1710.0,
|
|
"completions/max_terminated_length": 1710.0,
|
|
"completions/mean_length": 449.671875,
|
|
"completions/mean_terminated_length": 451.4353332519531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.14613333333333334,
|
|
"grad_norm": 0.013641622848808765,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": 0.0243,
|
|
"num_tokens": 29608582.0,
|
|
"reward": 1.1266405582427979,
|
|
"reward_std": 0.1831943392753601,
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
|
"rewards/final_brier_reward_step": 0.5700781345367432,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9367181658744812,
|
|
"step": 137
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6876623376623376,
|
|
"calib/avg_num_step_conf": 4.54296875,
|
|
"calib/ece": 0.5299212598425197,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.018766233766233764,
|
|
"calib/mean_conf": 0.0763779527559055,
|
|
"calib/mu_c": 0.08376623376623377,
|
|
"calib/mu_w": 0.065,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02496199603802734,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2313.0,
|
|
"completions/max_terminated_length": 2313.0,
|
|
"completions/mean_length": 482.6953125,
|
|
"completions/mean_terminated_length": 482.6953125,
|
|
"completions/min_length": 154.0,
|
|
"completions/min_terminated_length": 154.0,
|
|
"epoch": 0.1472,
|
|
"grad_norm": 0.008644962683320045,
|
|
"learning_rate": 1.7222222222222224e-06,
|
|
"loss": 0.0417,
|
|
"num_tokens": 29812168.0,
|
|
"reward": 1.1777849197387695,
|
|
"reward_std": 0.18831929564476013,
|
|
"rewards/accuracy_reward_step": 0.60546875,
|
|
"rewards/final_brier_reward_step": 0.48500001430511475,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.9223892688751221,
|
|
"step": 138
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6052830188679246,
|
|
"calib/avg_num_step_conf": 4.6875,
|
|
"calib/ece": 0.504296875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.010528301886792449,
|
|
"calib/mean_conf": 0.08164062500000001,
|
|
"calib/mu_c": 0.086,
|
|
"calib/mu_w": 0.07547169811320754,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02410191070453492,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 970.0,
|
|
"completions/max_terminated_length": 970.0,
|
|
"completions/mean_length": 436.7734375,
|
|
"completions/mean_terminated_length": 438.4862976074219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.0,
|
|
"epoch": 0.14826666666666666,
|
|
"grad_norm": 0.011200047098100185,
|
|
"learning_rate": 1.6944444444444446e-06,
|
|
"loss": -0.0204,
|
|
"num_tokens": 30002758.0,
|
|
"reward": 1.1692848205566406,
|
|
"reward_std": 0.10654379427433014,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/final_brier_reward_step": 0.5075976848602295,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.918194055557251,
|
|
"step": 139
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6192922374429224,
|
|
"calib/avg_num_step_conf": 4.94140625,
|
|
"calib/ece": 0.4887795275590552,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.011929223744292228,
|
|
"calib/mean_conf": 0.08602362204724409,
|
|
"calib/mu_c": 0.09109589041095889,
|
|
"calib/mu_w": 0.07916666666666666,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022438354595636326,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1251.0,
|
|
"completions/max_terminated_length": 1251.0,
|
|
"completions/mean_length": 457.515625,
|
|
"completions/mean_terminated_length": 459.3098449707031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.14933333333333335,
|
|
"grad_norm": 0.01058944035321474,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": -0.0054,
|
|
"num_tokens": 30200578.0,
|
|
"reward": 1.149709701538086,
|
|
"reward_std": 0.1826654076576233,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/final_brier_reward_step": 0.5179394483566284,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8848344683647156,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.673841059602649,
|
|
"calib/avg_num_step_conf": 5.08984375,
|
|
"calib/ece": 0.5114624505928854,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.017384105960264892,
|
|
"calib/mean_conf": 0.08537549407114625,
|
|
"calib/mu_c": 0.09238410596026489,
|
|
"calib/mu_w": 0.075,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022745309907310766,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1951.0,
|
|
"completions/max_terminated_length": 1951.0,
|
|
"completions/mean_length": 521.2265625,
|
|
"completions/mean_terminated_length": 521.2265625,
|
|
"completions/min_length": 189.0,
|
|
"completions/min_terminated_length": 189.0,
|
|
"epoch": 0.1504,
|
|
"grad_norm": 0.009190862998366356,
|
|
"learning_rate": 1.638888888888889e-06,
|
|
"loss": 0.0585,
|
|
"num_tokens": 30416788.0,
|
|
"reward": 1.1587471961975098,
|
|
"reward_std": 0.15836520493030548,
|
|
"rewards/accuracy_reward_step": 0.58984375,
|
|
"rewards/final_brier_reward_step": 0.49970701336860657,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8808870315551758,
|
|
"step": 141
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6190078037904125,
|
|
"calib/avg_num_step_conf": 5.375,
|
|
"calib/ece": 0.3703921568627451,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.011900780379041256,
|
|
"calib/mean_conf": 0.0884313725490196,
|
|
"calib/mu_c": 0.09487179487179487,
|
|
"calib/mu_w": 0.08297101449275361,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02108549813140402,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1400.0,
|
|
"completions/max_terminated_length": 1400.0,
|
|
"completions/mean_length": 491.26171875,
|
|
"completions/mean_terminated_length": 493.1882629394531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 155.0,
|
|
"epoch": 0.15146666666666667,
|
|
"grad_norm": 0.009988175705075264,
|
|
"learning_rate": 1.6111111111111113e-06,
|
|
"loss": -0.0049,
|
|
"num_tokens": 30623391.0,
|
|
"reward": 1.1010844707489014,
|
|
"reward_std": 0.1659606695175171,
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
|
"rewards/final_brier_reward_step": 0.6175488233566284,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9426777362823486,
|
|
"step": 142
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6052226870078741,
|
|
"calib/avg_num_step_conf": 5.19921875,
|
|
"calib/ece": 0.4064705882352942,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.010522268700787388,
|
|
"calib/mean_conf": 0.09156862745098039,
|
|
"calib/mu_c": 0.0968503937007874,
|
|
"calib/mu_w": 0.086328125,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01872112668592943,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2210.0,
|
|
"completions/max_terminated_length": 2210.0,
|
|
"completions/mean_length": 483.2109375,
|
|
"completions/mean_terminated_length": 485.10589599609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 194.0,
|
|
"epoch": 0.15253333333333333,
|
|
"grad_norm": 0.009511373937129974,
|
|
"learning_rate": 1.5833333333333333e-06,
|
|
"loss": -0.0132,
|
|
"num_tokens": 30830109.0,
|
|
"reward": 1.120365858078003,
|
|
"reward_std": 0.13358232378959656,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/final_brier_reward_step": 0.5873925685882568,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.923865795135498,
|
|
"step": 143
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5897100153295861,
|
|
"calib/avg_num_step_conf": 4.921875,
|
|
"calib/ece": 0.5052941176470589,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.008971001532958586,
|
|
"calib/mean_conf": 0.09078431372549019,
|
|
"calib/mu_c": 0.0944078947368421,
|
|
"calib/mu_w": 0.08543689320388351,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01938699152048357,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1496.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 467.51953125,
|
|
"completions/mean_terminated_length": 469.35296630859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.1536,
|
|
"grad_norm": 0.016215428709983826,
|
|
"learning_rate": 1.5555555555555558e-06,
|
|
"loss": 0.0181,
|
|
"num_tokens": 31029602.0,
|
|
"reward": 1.1693274974822998,
|
|
"reward_std": 0.17095816135406494,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/final_brier_reward_step": 0.5058691501617432,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8921340107917786,
|
|
"step": 144
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5698763955342903,
|
|
"calib/avg_num_step_conf": 4.84375,
|
|
"calib/ece": 0.5119521912350598,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.006987639553429029,
|
|
"calib/mean_conf": 0.09362549800796813,
|
|
"calib/mu_c": 0.09638157894736842,
|
|
"calib/mu_w": 0.08939393939393939,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.016676055407534942,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 3031.0,
|
|
"completions/max_terminated_length": 3031.0,
|
|
"completions/mean_length": 518.86328125,
|
|
"completions/mean_terminated_length": 525.0158081054688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 159.0,
|
|
"epoch": 0.15466666666666667,
|
|
"grad_norm": 0.01040293276309967,
|
|
"learning_rate": 1.527777777777778e-06,
|
|
"loss": -0.0028,
|
|
"num_tokens": 31240815.0,
|
|
"reward": 1.1662088632583618,
|
|
"reward_std": 0.21874991059303284,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/final_brier_reward_step": 0.4884374737739563,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.9223355054855347,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.542289523069954,
|
|
"calib/avg_num_step_conf": 4.6875,
|
|
"calib/ece": 0.3021653543307087,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.004228952306995382,
|
|
"calib/mean_conf": 0.09547244094488189,
|
|
"calib/mu_c": 0.09801980198019798,
|
|
"calib/mu_w": 0.0937908496732026,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.014348489877277104,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1820.0,
|
|
"completions/max_terminated_length": 1820.0,
|
|
"completions/mean_length": 486.7421875,
|
|
"completions/mean_terminated_length": 488.6510009765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 156.0,
|
|
"epoch": 0.15573333333333333,
|
|
"grad_norm": 0.010282909497618675,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": 0.0027,
|
|
"num_tokens": 31448317.0,
|
|
"reward": 1.0654511451721191,
|
|
"reward_std": 0.16977518796920776,
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
|
"rewards/final_brier_reward_step": 0.6657519936561584,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.9553008675575256,
|
|
"step": 146
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.5517878348067027,
|
|
"calib/avg_num_step_conf": 4.5625,
|
|
"calib/ece": 0.33433734939759036,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0051787834806703015,
|
|
"calib/mean_conf": 0.09136546184738956,
|
|
"calib/mu_c": 0.09433962264150944,
|
|
"calib/mu_w": 0.08916083916083914,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.018898985647955734,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2738.0,
|
|
"completions/max_terminated_length": 2738.0,
|
|
"completions/mean_length": 544.08203125,
|
|
"completions/mean_terminated_length": 546.2156982421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 159.0,
|
|
"epoch": 0.1568,
|
|
"grad_norm": 0.009104374796152115,
|
|
"learning_rate": 1.4722222222222225e-06,
|
|
"loss": -0.0059,
|
|
"num_tokens": 31666962.0,
|
|
"reward": 1.0537787675857544,
|
|
"reward_std": 0.1528664082288742,
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
|
"rewards/final_brier_reward_step": 0.6282519698143005,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/stepwise_brier_reward": 0.9132986068725586,
|
|
"step": 147
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6466361394857345,
|
|
"calib/avg_num_step_conf": 4.6484375,
|
|
"calib/ece": 0.5700396825396825,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014663613948573448,
|
|
"calib/mean_conf": 0.09265873015873016,
|
|
"calib/mu_c": 0.09760479041916166,
|
|
"calib/mu_w": 0.08294117647058821,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01769658863117844,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2371.0,
|
|
"completions/max_terminated_length": 2371.0,
|
|
"completions/mean_length": 477.36328125,
|
|
"completions/mean_terminated_length": 481.1220397949219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 123.0,
|
|
"epoch": 0.15786666666666666,
|
|
"grad_norm": 0.008585936389863491,
|
|
"learning_rate": 1.4444444444444445e-06,
|
|
"loss": -0.0094,
|
|
"num_tokens": 31869959.0,
|
|
"reward": 1.1949286460876465,
|
|
"reward_std": 0.1767604947090149,
|
|
"rewards/accuracy_reward_step": 0.65234375,
|
|
"rewards/final_brier_reward_step": 0.450615257024765,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8753587007522583,
|
|
"step": 148
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6027783112476794,
|
|
"calib/avg_num_step_conf": 4.734375,
|
|
"calib/ece": 0.41659999999999997,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01027783112476792,
|
|
"calib/mean_conf": 0.09140000000000001,
|
|
"calib/mu_c": 0.09645669291338581,
|
|
"calib/mu_w": 0.08617886178861789,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.018869022232219666,
|
|
"calib/step_conf_rate": 0.9765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1989.0,
|
|
"completions/max_terminated_length": 1989.0,
|
|
"completions/mean_length": 536.59375,
|
|
"completions/mean_terminated_length": 538.6980590820312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 161.0,
|
|
"epoch": 0.15893333333333334,
|
|
"grad_norm": 0.011967609636485577,
|
|
"learning_rate": 1.4166666666666667e-06,
|
|
"loss": 0.0278,
|
|
"num_tokens": 32087463.0,
|
|
"reward": 1.0985041856765747,
|
|
"reward_std": 0.24534091353416443,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/final_brier_reward_step": 0.563769519329071,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/stepwise_brier_reward": 0.8930402398109436,
|
|
"step": 149
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.592723880597015,
|
|
"calib/avg_num_step_conf": 4.67578125,
|
|
"calib/ece": 0.4364173228346457,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009272388059701445,
|
|
"calib/mean_conf": 0.09114173228346456,
|
|
"calib/mu_c": 0.09552238805970147,
|
|
"calib/mu_w": 0.08625000000000002,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.019090428986509384,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1460.0,
|
|
"completions/max_terminated_length": 1460.0,
|
|
"completions/mean_length": 437.40234375,
|
|
"completions/mean_terminated_length": 440.8464660644531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 160.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.015229527838528156,
|
|
"learning_rate": 1.3888888888888892e-06,
|
|
"loss": -0.0326,
|
|
"num_tokens": 32280078.0,
|
|
"reward": 1.1190340518951416,
|
|
"reward_std": 0.18449994921684265,
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
|
"rewards/final_brier_reward_step": 0.5601464509963989,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8652180433273315,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5941932624113475,
|
|
"calib/avg_num_step_conf": 4.95703125,
|
|
"calib/ece": 0.35731225296442687,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009419326241134784,
|
|
"calib/mean_conf": 0.08537549407114625,
|
|
"calib/mu_c": 0.09062500000000002,
|
|
"calib/mu_w": 0.08120567375886524,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022745309907310766,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2732.0,
|
|
"completions/max_terminated_length": 2732.0,
|
|
"completions/mean_length": 570.6171875,
|
|
"completions/mean_terminated_length": 575.1102294921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 221.0,
|
|
"epoch": 0.16106666666666666,
|
|
"grad_norm": 0.009542996063828468,
|
|
"learning_rate": 1.3611111111111112e-06,
|
|
"loss": 0.0044,
|
|
"num_tokens": 32508860.0,
|
|
"reward": 1.0749963521957397,
|
|
"reward_std": 0.19749057292938232,
|
|
"rewards/accuracy_reward_step": 0.4375,
|
|
"rewards/final_brier_reward_step": 0.6223633289337158,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.909946084022522,
|
|
"step": 151
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6332244218079313,
|
|
"calib/avg_num_step_conf": 4.71875,
|
|
"calib/ece": 0.4302,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.013322442180793106,
|
|
"calib/mean_conf": 0.08580000000000002,
|
|
"calib/mu_c": 0.09224806201550387,
|
|
"calib/mu_w": 0.07892561983471076,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.9765625,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022546840133375674,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 2797.0,
|
|
"completions/max_terminated_length": 2797.0,
|
|
"completions/mean_length": 519.3515625,
|
|
"completions/mean_terminated_length": 529.6972045898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 200.0,
|
|
"epoch": 0.16213333333333332,
|
|
"grad_norm": 0.007988336496055126,
|
|
"learning_rate": 1.3333333333333334e-06,
|
|
"loss": -0.0165,
|
|
"num_tokens": 32722886.0,
|
|
"reward": 1.105271816253662,
|
|
"reward_std": 0.20392175018787384,
|
|
"rewards/accuracy_reward_step": 0.50390625,
|
|
"rewards/final_brier_reward_step": 0.5540722608566284,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"rewards/stepwise_brier_reward": 0.9082551598548889,
|
|
"step": 152
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.650771565899317,
|
|
"calib/avg_num_step_conf": 5.35546875,
|
|
"calib/ece": 0.45000000000000007,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.015077156589931681,
|
|
"calib/mean_conf": 0.08174603174603175,
|
|
"calib/mu_c": 0.08880597014925372,
|
|
"calib/mu_w": 0.07372881355932204,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024072620457306508,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2220.0,
|
|
"completions/max_terminated_length": 2220.0,
|
|
"completions/mean_length": 540.6171875,
|
|
"completions/mean_terminated_length": 542.7373046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 213.0,
|
|
"epoch": 0.1632,
|
|
"grad_norm": 0.007903658784925938,
|
|
"learning_rate": 1.3055555555555556e-06,
|
|
"loss": 0.0549,
|
|
"num_tokens": 32944284.0,
|
|
"reward": 1.1284171342849731,
|
|
"reward_std": 0.2020460069179535,
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
|
"rewards/final_brier_reward_step": 0.5463769435882568,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/stepwise_brier_reward": 0.9349770545959473,
|
|
"step": 153
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6056811048336472,
|
|
"calib/avg_num_step_conf": 5.23828125,
|
|
"calib/ece": 0.3851778656126483,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.010568110483364729,
|
|
"calib/mean_conf": 0.08122529644268774,
|
|
"calib/mu_c": 0.08686440677966102,
|
|
"calib/mu_w": 0.07629629629629629,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024212510902437596,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2548.0,
|
|
"completions/max_terminated_length": 2548.0,
|
|
"completions/mean_length": 523.140625,
|
|
"completions/mean_terminated_length": 529.3438720703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 189.0,
|
|
"epoch": 0.16426666666666667,
|
|
"grad_norm": 0.007686985656619072,
|
|
"learning_rate": 1.2777777777777779e-06,
|
|
"loss": -0.0481,
|
|
"num_tokens": 33158328.0,
|
|
"reward": 1.0843218564987183,
|
|
"reward_std": 0.13767561316490173,
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
|
"rewards/final_brier_reward_step": 0.5995800495147705,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.9006271958351135,
|
|
"step": 154
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6250559284116332,
|
|
"calib/avg_num_step_conf": 5.55078125,
|
|
"calib/ece": 0.33405511811023625,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01250559284116333,
|
|
"calib/mean_conf": 0.07933070866141732,
|
|
"calib/mu_c": 0.08666666666666668,
|
|
"calib/mu_w": 0.07416107382550335,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024622042207947028,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2457.0,
|
|
"completions/max_terminated_length": 2457.0,
|
|
"completions/mean_length": 490.5546875,
|
|
"completions/mean_terminated_length": 492.47845458984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 210.0,
|
|
"epoch": 0.16533333333333333,
|
|
"grad_norm": 0.01461772620677948,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.0216,
|
|
"num_tokens": 33366806.0,
|
|
"reward": 1.0643209218978882,
|
|
"reward_std": 0.17543260753154755,
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
|
"rewards/final_brier_reward_step": 0.6423828601837158,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9365804195404053,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6879252752690832,
|
|
"calib/avg_num_step_conf": 6.046875,
|
|
"calib/ece": 0.3847058823529412,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.018792527526908312,
|
|
"calib/mean_conf": 0.0780392156862745,
|
|
"calib/mu_c": 0.08813559322033897,
|
|
"calib/mu_w": 0.06934306569343066,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024814575716951177,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2267.0,
|
|
"completions/max_terminated_length": 2267.0,
|
|
"completions/mean_length": 521.125,
|
|
"completions/mean_terminated_length": 523.1686401367188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 209.0,
|
|
"epoch": 0.1664,
|
|
"grad_norm": 0.011230715550482273,
|
|
"learning_rate": 1.2222222222222223e-06,
|
|
"loss": -0.0324,
|
|
"num_tokens": 33580654.0,
|
|
"reward": 1.099372148513794,
|
|
"reward_std": 0.17626884579658508,
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
|
"rewards/final_brier_reward_step": 0.6097265481948853,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9358479976654053,
|
|
"step": 156
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6294715579592102,
|
|
"calib/avg_num_step_conf": 5.67578125,
|
|
"calib/ece": 0.5146825396825397,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.012947155795921025,
|
|
"calib/mean_conf": 0.07658730158730159,
|
|
"calib/mu_c": 0.08187919463087247,
|
|
"calib/mu_w": 0.06893203883495144,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024949558586695474,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1704.0,
|
|
"completions/max_terminated_length": 1704.0,
|
|
"completions/mean_length": 505.3359375,
|
|
"completions/mean_terminated_length": 511.3280944824219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 153.0,
|
|
"epoch": 0.16746666666666668,
|
|
"grad_norm": 0.011182314716279507,
|
|
"learning_rate": 1.1944444444444446e-06,
|
|
"loss": -0.0221,
|
|
"num_tokens": 33789428.0,
|
|
"reward": 1.1364257335662842,
|
|
"reward_std": 0.16615410149097443,
|
|
"rewards/accuracy_reward_step": 0.58203125,
|
|
"rewards/final_brier_reward_step": 0.4912695288658142,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8412890434265137,
|
|
"step": 157
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.5744284954811271,
|
|
"calib/avg_num_step_conf": 6.3046875,
|
|
"calib/ece": 0.5243027888446214,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.007442849548112723,
|
|
"calib/mean_conf": 0.08127490039840637,
|
|
"calib/mu_c": 0.08421052631578947,
|
|
"calib/mu_w": 0.07676767676767675,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024199702993840225,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1363.0,
|
|
"completions/max_terminated_length": 1363.0,
|
|
"completions/mean_length": 506.890625,
|
|
"completions/mean_terminated_length": 508.8784484863281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 152.0,
|
|
"epoch": 0.16853333333333334,
|
|
"grad_norm": 0.017442265525460243,
|
|
"learning_rate": 1.1666666666666668e-06,
|
|
"loss": -0.007,
|
|
"num_tokens": 34000112.0,
|
|
"reward": 1.1422319412231445,
|
|
"reward_std": 0.22000621259212494,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/final_brier_reward_step": 0.46803712844848633,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/stepwise_brier_reward": 0.8703532218933105,
|
|
"step": 158
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6704815509693559,
|
|
"calib/avg_num_step_conf": 6.1953125,
|
|
"calib/ece": 0.43596837944664035,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.017048155096935566,
|
|
"calib/mean_conf": 0.07786561264822134,
|
|
"calib/mu_c": 0.08615384615384615,
|
|
"calib/mu_w": 0.06910569105691058,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024835222248861675,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2900.0,
|
|
"completions/max_terminated_length": 2900.0,
|
|
"completions/mean_length": 550.7421875,
|
|
"completions/mean_terminated_length": 552.9019775390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.0,
|
|
"epoch": 0.1696,
|
|
"grad_norm": 0.009645085781812668,
|
|
"learning_rate": 1.138888888888889e-06,
|
|
"loss": 0.0177,
|
|
"num_tokens": 34221566.0,
|
|
"reward": 1.1186250448226929,
|
|
"reward_std": 0.1828586310148239,
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
|
"rewards/final_brier_reward_step": 0.5613672137260437,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9252033829689026,
|
|
"step": 159
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5874056662130397,
|
|
"calib/avg_num_step_conf": 6.296875,
|
|
"calib/ece": 0.4613725490196079,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.008740566621303958,
|
|
"calib/mean_conf": 0.07588235294117644,
|
|
"calib/mu_c": 0.07992700729927006,
|
|
"calib/mu_w": 0.0711864406779661,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024984424213641537,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1610.0,
|
|
"completions/max_terminated_length": 1610.0,
|
|
"completions/mean_length": 515.1640625,
|
|
"completions/mean_terminated_length": 517.184326171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 248.0,
|
|
"epoch": 0.17066666666666666,
|
|
"grad_norm": 0.013785509392619133,
|
|
"learning_rate": 1.111111111111111e-06,
|
|
"loss": 0.0145,
|
|
"num_tokens": 34433968.0,
|
|
"reward": 1.1234824657440186,
|
|
"reward_std": 0.20462122559547424,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/final_brier_reward_step": 0.5362597703933716,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8839104175567627,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5733961983961984,
|
|
"calib/avg_num_step_conf": 6.0546875,
|
|
"calib/ece": 0.6273437499999999,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.008286308286308275,
|
|
"calib/mean_conf": 0.08359375000000002,
|
|
"calib/mu_c": 0.08598901098901097,
|
|
"calib/mu_w": 0.0777027027027027,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.027321419453196424,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1802.0,
|
|
"completions/max_terminated_length": 1802.0,
|
|
"completions/mean_length": 488.19921875,
|
|
"completions/mean_terminated_length": 490.11376953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 195.0,
|
|
"epoch": 0.17173333333333332,
|
|
"grad_norm": 0.018429474905133247,
|
|
"learning_rate": 1.0833333333333335e-06,
|
|
"loss": 0.0109,
|
|
"num_tokens": 34638547.0,
|
|
"reward": 1.231764793395996,
|
|
"reward_std": 0.16334450244903564,
|
|
"rewards/accuracy_reward_step": 0.7109375,
|
|
"rewards/final_brier_reward_step": 0.4035937488079071,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.8761218786239624,
|
|
"step": 161
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5352941176470589,
|
|
"calib/avg_num_step_conf": 5.484375,
|
|
"calib/ece": 0.5807843137254902,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003529411764705878,
|
|
"calib/mean_conf": 0.08588235294117647,
|
|
"calib/mu_c": 0.08705882352941176,
|
|
"calib/mu_w": 0.08352941176470588,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022507207611422345,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2149.0,
|
|
"completions/max_terminated_length": 2149.0,
|
|
"completions/mean_length": 479.75390625,
|
|
"completions/mean_terminated_length": 481.63531494140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.0,
|
|
"epoch": 0.1728,
|
|
"grad_norm": 0.016633324325084686,
|
|
"learning_rate": 1.0555555555555557e-06,
|
|
"loss": -0.0073,
|
|
"num_tokens": 34841188.0,
|
|
"reward": 1.1983163356781006,
|
|
"reward_std": 0.14873743057250977,
|
|
"rewards/accuracy_reward_step": 0.6640625,
|
|
"rewards/final_brier_reward_step": 0.4359374940395355,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8682651519775391,
|
|
"step": 162
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6971554993678887,
|
|
"calib/avg_num_step_conf": 5.2265625,
|
|
"calib/ece": 0.47430830039525695,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.019715549936788854,
|
|
"calib/mean_conf": 0.07905138339920949,
|
|
"calib/mu_c": 0.08785714285714284,
|
|
"calib/mu_w": 0.06814159292035399,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024669541798594676,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2963.0,
|
|
"completions/max_terminated_length": 2963.0,
|
|
"completions/mean_length": 536.28515625,
|
|
"completions/mean_terminated_length": 540.5078735351562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.0,
|
|
"epoch": 0.17386666666666667,
|
|
"grad_norm": 0.01776149682700634,
|
|
"learning_rate": 1.0277777777777777e-06,
|
|
"loss": 0.073,
|
|
"num_tokens": 35058989.0,
|
|
"reward": 1.14092218875885,
|
|
"reward_std": 0.19047382473945618,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/final_brier_reward_step": 0.5307226777076721,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9194308519363403,
|
|
"step": 163
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6394538606403014,
|
|
"calib/avg_num_step_conf": 5.23828125,
|
|
"calib/ece": 0.45454545454545453,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.013945386064030105,
|
|
"calib/mean_conf": 0.07905138339920949,
|
|
"calib/mu_c": 0.08555555555555554,
|
|
"calib/mu_w": 0.07161016949152543,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024669541798594676,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2376.0,
|
|
"completions/max_terminated_length": 2376.0,
|
|
"completions/mean_length": 533.73046875,
|
|
"completions/mean_terminated_length": 540.059326171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 216.0,
|
|
"epoch": 0.17493333333333333,
|
|
"grad_norm": 0.03917751833796501,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": -0.0335,
|
|
"num_tokens": 35277440.0,
|
|
"reward": 1.1271326541900635,
|
|
"reward_std": 0.1549869328737259,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/final_brier_reward_step": 0.5443945527076721,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9150543212890625,
|
|
"step": 164
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5979746367889682,
|
|
"calib/avg_num_step_conf": 5.3203125,
|
|
"calib/ece": 0.4349019607843137,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009797463678896806,
|
|
"calib/mean_conf": 0.0788235294117647,
|
|
"calib/mu_c": 0.08358778625954198,
|
|
"calib/mu_w": 0.07379032258064518,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.024705882352941175,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1468.0,
|
|
"completions/max_terminated_length": 1468.0,
|
|
"completions/mean_length": 511.83984375,
|
|
"completions/mean_terminated_length": 513.8471069335938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 204.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.013871337287127972,
|
|
"learning_rate": 9.722222222222224e-07,
|
|
"loss": -0.0228,
|
|
"num_tokens": 35489727.0,
|
|
"reward": 1.1261584758758545,
|
|
"reward_std": 0.15966832637786865,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5631250143051147,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9330711364746094,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6554248366013071,
|
|
"calib/avg_num_step_conf": 5.02734375,
|
|
"calib/ece": 0.5213438735177865,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.015542483660130738,
|
|
"calib/mean_conf": 0.083399209486166,
|
|
"calib/mu_c": 0.08954248366013072,
|
|
"calib/mu_w": 0.07399999999999998,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023546831634160444,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2577.0,
|
|
"completions/max_terminated_length": 2577.0,
|
|
"completions/mean_length": 540.76171875,
|
|
"completions/mean_terminated_length": 545.0196533203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 201.0,
|
|
"epoch": 0.17706666666666668,
|
|
"grad_norm": 0.009842603467404842,
|
|
"learning_rate": 9.444444444444445e-07,
|
|
"loss": 0.0084,
|
|
"num_tokens": 35710026.0,
|
|
"reward": 1.168914794921875,
|
|
"reward_std": 0.18666985630989075,
|
|
"rewards/accuracy_reward_step": 0.59765625,
|
|
"rewards/final_brier_reward_step": 0.48636719584465027,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.918549656867981,
|
|
"step": 166
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6343370379943137,
|
|
"calib/avg_num_step_conf": 5.1484375,
|
|
"calib/ece": 0.49186507936507934,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01343370379943136,
|
|
"calib/mean_conf": 0.0875,
|
|
"calib/mu_c": 0.09315068493150684,
|
|
"calib/mu_w": 0.07971698113207548,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021650635094610966,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2314.0,
|
|
"completions/max_terminated_length": 2314.0,
|
|
"completions/mean_length": 508.32421875,
|
|
"completions/mean_terminated_length": 510.31768798828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.0,
|
|
"epoch": 0.17813333333333334,
|
|
"grad_norm": 0.009834786877036095,
|
|
"learning_rate": 9.166666666666666e-07,
|
|
"loss": -0.0031,
|
|
"num_tokens": 35921445.0,
|
|
"reward": 1.1382412910461426,
|
|
"reward_std": 0.1689327359199524,
|
|
"rewards/accuracy_reward_step": 0.5703125,
|
|
"rewards/final_brier_reward_step": 0.5123144388198853,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8533361554145813,
|
|
"step": 167
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6398277437035104,
|
|
"calib/avg_num_step_conf": 4.84375,
|
|
"calib/ece": 0.535686274509804,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.013982774370351031,
|
|
"calib/mean_conf": 0.08392156862745097,
|
|
"calib/mu_c": 0.08924050632911391,
|
|
"calib/mu_w": 0.07525773195876288,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023353920724916457,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1719.0,
|
|
"completions/max_terminated_length": 1719.0,
|
|
"completions/mean_length": 526.44140625,
|
|
"completions/mean_terminated_length": 528.5059204101562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.1792,
|
|
"grad_norm": 0.015063202939927578,
|
|
"learning_rate": 8.88888888888889e-07,
|
|
"loss": 0.0472,
|
|
"num_tokens": 36136566.0,
|
|
"reward": 1.1882625818252563,
|
|
"reward_std": 0.19077295064926147,
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
|
"rewards/final_brier_reward_step": 0.4815039038658142,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9228549003601074,
|
|
"step": 168
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.633487603305785,
|
|
"calib/avg_num_step_conf": 4.6796875,
|
|
"calib/ece": 0.4065040650406504,
|
|
"calib/final_conf_rate": 0.9609375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.013348760330578524,
|
|
"calib/mean_conf": 0.08536585365853659,
|
|
"calib/mu_c": 0.09214876033057852,
|
|
"calib/mu_w": 0.0788,
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
|
"calib/nonempty_reasoning_rate": 0.96875,
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022749705007533698,
|
|
"calib/step_conf_rate": 0.96484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2893.0,
|
|
"completions/max_terminated_length": 2893.0,
|
|
"completions/mean_length": 512.16015625,
|
|
"completions/mean_terminated_length": 518.2332153320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 212.0,
|
|
"epoch": 0.18026666666666666,
|
|
"grad_norm": 0.009256192483007908,
|
|
"learning_rate": 8.611111111111112e-07,
|
|
"loss": -0.0376,
|
|
"num_tokens": 36347543.0,
|
|
"reward": 1.072580099105835,
|
|
"reward_std": 0.18603584170341492,
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
|
"rewards/final_brier_reward_step": 0.5678906440734863,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/stepwise_brier_reward": 0.8795391321182251,
|
|
"step": 169
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6563919532770928,
|
|
"calib/avg_num_step_conf": 4.8203125,
|
|
"calib/ece": 0.4558232931726908,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.96875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01563919532770923,
|
|
"calib/mean_conf": 0.08232931726907632,
|
|
"calib/mu_c": 0.08955223880597012,
|
|
"calib/mu_w": 0.07391304347826089,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023901487576492388,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 1667.0,
|
|
"completions/max_terminated_length": 1667.0,
|
|
"completions/mean_length": 535.40234375,
|
|
"completions/mean_terminated_length": 546.0677490234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 199.0,
|
|
"epoch": 0.18133333333333335,
|
|
"grad_norm": 0.016515735536813736,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": -0.0212,
|
|
"num_tokens": 36564438.0,
|
|
"reward": 1.1249117851257324,
|
|
"reward_std": 0.216035395860672,
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
|
"rewards/final_brier_reward_step": 0.5319238901138306,
|
|
"rewards/format_reward_step": 0.96875,
|
|
"rewards/stepwise_brier_reward": 0.9389243721961975,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6654024957181306,
|
|
"calib/avg_num_step_conf": 4.96484375,
|
|
"calib/ece": 0.3921875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.016540249571813076,
|
|
"calib/mean_conf": 0.08437500000000002,
|
|
"calib/mu_c": 0.09303278688524591,
|
|
"calib/mu_w": 0.07649253731343283,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02317562027217395,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1189.0,
|
|
"completions/max_terminated_length": 1189.0,
|
|
"completions/mean_length": 479.8828125,
|
|
"completions/mean_terminated_length": 481.7647399902344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 205.0,
|
|
"epoch": 0.1824,
|
|
"grad_norm": 0.013426011428236961,
|
|
"learning_rate": 8.055555555555557e-07,
|
|
"loss": 0.0009,
|
|
"num_tokens": 36769864.0,
|
|
"reward": 1.1131434440612793,
|
|
"reward_std": 0.1605221927165985,
|
|
"rewards/accuracy_reward_step": 0.4765625,
|
|
"rewards/final_brier_reward_step": 0.6044531464576721,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9374175667762756,
|
|
"step": 171
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6706693919264181,
|
|
"calib/avg_num_step_conf": 4.6171875,
|
|
"calib/ece": 0.508235294117647,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.017066939192641795,
|
|
"calib/mean_conf": 0.08784313725490195,
|
|
"calib/mu_c": 0.09473684210526315,
|
|
"calib/mu_w": 0.07766990291262135,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021448865365136436,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2084.0,
|
|
"completions/max_terminated_length": 2084.0,
|
|
"completions/mean_length": 481.19140625,
|
|
"completions/mean_terminated_length": 483.0784606933594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 211.0,
|
|
"epoch": 0.18346666666666667,
|
|
"grad_norm": 0.0074419486336410046,
|
|
"learning_rate": 7.777777777777779e-07,
|
|
"loss": -0.007,
|
|
"num_tokens": 36972081.0,
|
|
"reward": 1.182291030883789,
|
|
"reward_std": 0.17799603939056396,
|
|
"rewards/accuracy_reward_step": 0.59375,
|
|
"rewards/final_brier_reward_step": 0.5066992044448853,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9423279762268066,
|
|
"step": 172
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5288003051687965,
|
|
"calib/avg_num_step_conf": 4.66796875,
|
|
"calib/ece": 0.49015748031496065,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002880030516879614,
|
|
"calib/mean_conf": 0.08858267716535433,
|
|
"calib/mu_c": 0.08979591836734693,
|
|
"calib/mu_w": 0.08691588785046732,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.020988351079152503,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2773.0,
|
|
"completions/max_terminated_length": 2773.0,
|
|
"completions/mean_length": 544.09765625,
|
|
"completions/mean_terminated_length": 548.3818969726562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.18453333333333333,
|
|
"grad_norm": 0.010779325850307941,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": -0.0094,
|
|
"num_tokens": 37190210.0,
|
|
"reward": 1.1378142833709717,
|
|
"reward_std": 0.20299270749092102,
|
|
"rewards/accuracy_reward_step": 0.57421875,
|
|
"rewards/final_brier_reward_step": 0.5128710865974426,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8317651152610779,
|
|
"step": 173
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5471066624960901,
|
|
"calib/avg_num_step_conf": 5.0546875,
|
|
"calib/ece": 0.4594488188976378,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.004710666249609016,
|
|
"calib/mean_conf": 0.08779527559055117,
|
|
"calib/mu_c": 0.08992805755395683,
|
|
"calib/mu_w": 0.08521739130434781,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021477451491316312,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2617.0,
|
|
"completions/max_terminated_length": 2617.0,
|
|
"completions/mean_length": 532.14453125,
|
|
"completions/mean_terminated_length": 532.14453125,
|
|
"completions/min_length": 221.0,
|
|
"completions/min_terminated_length": 221.0,
|
|
"epoch": 0.1856,
|
|
"grad_norm": 0.00913445558398962,
|
|
"learning_rate": 7.222222222222222e-07,
|
|
"loss": 0.0721,
|
|
"num_tokens": 37406351.0,
|
|
"reward": 1.1227368116378784,
|
|
"reward_std": 0.21584494411945343,
|
|
"rewards/accuracy_reward_step": 0.54296875,
|
|
"rewards/final_brier_reward_step": 0.5349023342132568,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.853955090045929,
|
|
"step": 174
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6414965986394559,
|
|
"calib/avg_num_step_conf": 4.6015625,
|
|
"calib/ece": 0.3301587301587302,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01414965986394559,
|
|
"calib/mean_conf": 0.0865079365079365,
|
|
"calib/mu_c": 0.09476190476190476,
|
|
"calib/mu_w": 0.08061224489795916,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02219385945096756,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2684.0,
|
|
"completions/max_terminated_length": 2684.0,
|
|
"completions/mean_length": 541.32421875,
|
|
"completions/mean_terminated_length": 549.9166870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 137.0,
|
|
"epoch": 0.18666666666666668,
|
|
"grad_norm": 0.009307211264967918,
|
|
"learning_rate": 6.944444444444446e-07,
|
|
"loss": -0.0549,
|
|
"num_tokens": 37626434.0,
|
|
"reward": 1.0552879571914673,
|
|
"reward_std": 0.17625892162322998,
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
|
"rewards/final_brier_reward_step": 0.644101619720459,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8985738158226013,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6590648854961833,
|
|
"calib/avg_num_step_conf": 4.5703125,
|
|
"calib/ece": 0.43486055776892435,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01590648854961829,
|
|
"calib/mean_conf": 0.08705179282868526,
|
|
"calib/mu_c": 0.0946564885496183,
|
|
"calib/mu_w": 0.07875000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021903294035702726,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1461.0,
|
|
"completions/max_terminated_length": 1461.0,
|
|
"completions/mean_length": 489.55859375,
|
|
"completions/mean_terminated_length": 491.47845458984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 186.0,
|
|
"epoch": 0.18773333333333334,
|
|
"grad_norm": 0.0212948489934206,
|
|
"learning_rate": 6.666666666666667e-07,
|
|
"loss": 0.0201,
|
|
"num_tokens": 37831505.0,
|
|
"reward": 1.1138497591018677,
|
|
"reward_std": 0.18853960931301117,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5538281202316284,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"rewards/stepwise_brier_reward": 0.9102426767349243,
|
|
"step": 176
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6754429133858268,
|
|
"calib/avg_num_step_conf": 4.56640625,
|
|
"calib/ece": 0.41117647058823537,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01754429133858268,
|
|
"calib/mean_conf": 0.0868627450980392,
|
|
"calib/mu_c": 0.0956692913385827,
|
|
"calib/mu_w": 0.07812500000000001,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022006255445644232,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1461.0,
|
|
"completions/max_terminated_length": 1461.0,
|
|
"completions/mean_length": 491.7578125,
|
|
"completions/mean_terminated_length": 493.6863098144531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 204.0,
|
|
"epoch": 0.1888,
|
|
"grad_norm": 0.008859474211931229,
|
|
"learning_rate": 6.388888888888889e-07,
|
|
"loss": -0.0356,
|
|
"num_tokens": 38036907.0,
|
|
"reward": 1.1183327436447144,
|
|
"reward_std": 0.14114660024642944,
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
|
"rewards/final_brier_reward_step": 0.5869238376617432,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9166704416275024,
|
|
"step": 177
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6448474126492703,
|
|
"calib/avg_num_step_conf": 4.46484375,
|
|
"calib/ece": 0.43988095238095243,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014484741264927006,
|
|
"calib/mean_conf": 0.08789682539682539,
|
|
"calib/mu_c": 0.09473684210526313,
|
|
"calib/mu_w": 0.08025210084033613,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02141662659439622,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2612.0,
|
|
"completions/max_terminated_length": 2612.0,
|
|
"completions/mean_length": 481.703125,
|
|
"completions/mean_terminated_length": 485.4960632324219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 139.0,
|
|
"epoch": 0.18986666666666666,
|
|
"grad_norm": 0.01023717038333416,
|
|
"learning_rate": 6.111111111111112e-07,
|
|
"loss": 0.015,
|
|
"num_tokens": 38241975.0,
|
|
"reward": 1.1178292036056519,
|
|
"reward_std": 0.16584745049476624,
|
|
"rewards/accuracy_reward_step": 0.51953125,
|
|
"rewards/final_brier_reward_step": 0.555224597454071,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.888992428779602,
|
|
"step": 178
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6491150442477875,
|
|
"calib/avg_num_step_conf": 4.49609375,
|
|
"calib/ece": 0.4650197628458499,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014911504424778732,
|
|
"calib/mean_conf": 0.0883399209486166,
|
|
"calib/mu_c": 0.09499999999999997,
|
|
"calib/mu_w": 0.08008849557522124,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021143474385366755,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2559.0,
|
|
"completions/max_terminated_length": 2559.0,
|
|
"completions/mean_length": 517.0234375,
|
|
"completions/mean_terminated_length": 517.0234375,
|
|
"completions/min_length": 244.0,
|
|
"completions/min_terminated_length": 244.0,
|
|
"epoch": 0.19093333333333334,
|
|
"grad_norm": 0.01045550312846899,
|
|
"learning_rate": 5.833333333333334e-07,
|
|
"loss": 0.0674,
|
|
"num_tokens": 38456277.0,
|
|
"reward": 1.1350390911102295,
|
|
"reward_std": 0.21084295213222504,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/final_brier_reward_step": 0.5332910418510437,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8923243880271912,
|
|
"step": 179
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.643921568627451,
|
|
"calib/avg_num_step_conf": 4.4921875,
|
|
"calib/ece": 0.5087301587301587,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014392156862745087,
|
|
"calib/mean_conf": 0.08650793650793652,
|
|
"calib/mu_c": 0.09233333333333331,
|
|
"calib/mu_w": 0.07794117647058822,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022193859450967565,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2197.0,
|
|
"completions/max_terminated_length": 2197.0,
|
|
"completions/mean_length": 561.06640625,
|
|
"completions/mean_terminated_length": 563.2667236328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 210.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.011729094199836254,
|
|
"learning_rate": 5.555555555555555e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 38679446.0,
|
|
"reward": 1.1534072160720825,
|
|
"reward_std": 0.16474701464176178,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/final_brier_reward_step": 0.49878910183906555,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.87855064868927,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5728070175438597,
|
|
"calib/avg_num_step_conf": 4.71875,
|
|
"calib/ece": 0.4594488188976378,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.007280701754385943,
|
|
"calib/mean_conf": 0.09173228346456692,
|
|
"calib/mu_c": 0.09499999999999997,
|
|
"calib/mu_w": 0.08771929824561403,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01857500175131567,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2866.0,
|
|
"completions/max_terminated_length": 2866.0,
|
|
"completions/mean_length": 476.7734375,
|
|
"completions/mean_terminated_length": 478.6431579589844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.0,
|
|
"epoch": 0.19306666666666666,
|
|
"grad_norm": 0.01009936723858118,
|
|
"learning_rate": 5.277777777777779e-07,
|
|
"loss": -0.0001,
|
|
"num_tokens": 38883444.0,
|
|
"reward": 1.1534780263900757,
|
|
"reward_std": 0.171014666557312,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/final_brier_reward_step": 0.54052734375,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.9484823942184448,
|
|
"step": 181
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5915032679738562,
|
|
"calib/avg_num_step_conf": 4.6328125,
|
|
"calib/ece": 0.5131372549019608,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00915032679738563,
|
|
"calib/mean_conf": 0.0868627450980392,
|
|
"calib/mu_c": 0.09052287581699345,
|
|
"calib/mu_w": 0.08137254901960782,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022006255445644236,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2051.0,
|
|
"completions/max_terminated_length": 2051.0,
|
|
"completions/mean_length": 489.67578125,
|
|
"completions/mean_terminated_length": 491.5960998535156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 221.0,
|
|
"epoch": 0.19413333333333332,
|
|
"grad_norm": 0.011595308780670166,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": -0.0103,
|
|
"num_tokens": 39090641.0,
|
|
"reward": 1.1820398569107056,
|
|
"reward_std": 0.13127438724040985,
|
|
"rewards/accuracy_reward_step": 0.59765625,
|
|
"rewards/final_brier_reward_step": 0.49864256381988525,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.9418116807937622,
|
|
"step": 182
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6260573618821041,
|
|
"calib/avg_num_step_conf": 4.69921875,
|
|
"calib/ece": 0.5294466403162056,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.012605736188210412,
|
|
"calib/mean_conf": 0.08715415019762844,
|
|
"calib/mu_c": 0.09198717948717948,
|
|
"calib/mu_w": 0.07938144329896907,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021846661826775464,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2897.0,
|
|
"completions/max_terminated_length": 2897.0,
|
|
"completions/mean_length": 544.23828125,
|
|
"completions/mean_terminated_length": 550.6917114257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 232.0,
|
|
"epoch": 0.1952,
|
|
"grad_norm": 0.02083410508930683,
|
|
"learning_rate": 4.7222222222222226e-07,
|
|
"loss": -0.0343,
|
|
"num_tokens": 39312326.0,
|
|
"reward": 1.1755211353302002,
|
|
"reward_std": 0.20563694834709167,
|
|
"rewards/accuracy_reward_step": 0.609375,
|
|
"rewards/final_brier_reward_step": 0.47916996479034424,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.9124943017959595,
|
|
"step": 183
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5434169278996865,
|
|
"calib/avg_num_step_conf": 4.6640625,
|
|
"calib/ece": 0.47843137254901963,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.004341692789968668,
|
|
"calib/mean_conf": 0.09019607843137255,
|
|
"calib/mu_c": 0.09206896551724138,
|
|
"calib/mu_w": 0.08772727272727271,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.019851428167957437,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2040.0,
|
|
"completions/max_terminated_length": 2040.0,
|
|
"completions/mean_length": 472.265625,
|
|
"completions/mean_terminated_length": 474.11767578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 178.0,
|
|
"epoch": 0.19626666666666667,
|
|
"grad_norm": 0.00886443629860878,
|
|
"learning_rate": 4.444444444444445e-07,
|
|
"loss": 0.0028,
|
|
"num_tokens": 39514186.0,
|
|
"reward": 1.1536989212036133,
|
|
"reward_std": 0.1764037013053894,
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
|
"rewards/final_brier_reward_step": 0.5254883170127869,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8997563123703003,
|
|
"step": 184
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.6647456279809221,
|
|
"calib/avg_num_step_conf": 4.3671875,
|
|
"calib/ece": 0.4631578947368421,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01647456279809223,
|
|
"calib/mean_conf": 0.08744939271255062,
|
|
"calib/mu_c": 0.09485294117647058,
|
|
"calib/mu_w": 0.07837837837837835,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.97265625,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021679774470429618,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02734375,
|
|
"completions/max_length": 1855.0,
|
|
"completions/max_terminated_length": 1855.0,
|
|
"completions/mean_length": 476.40234375,
|
|
"completions/mean_terminated_length": 489.795166015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 220.0,
|
|
"epoch": 0.19733333333333333,
|
|
"grad_norm": 0.01681617647409439,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"loss": -0.0158,
|
|
"num_tokens": 39718745.0,
|
|
"reward": 1.1039948463439941,
|
|
"reward_std": 0.17881864309310913,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/final_brier_reward_step": 0.5226464867591858,
|
|
"rewards/format_reward_step": 0.9609375,
|
|
"rewards/stepwise_brier_reward": 0.8613112568855286,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6417481311098332,
|
|
"calib/avg_num_step_conf": 4.25,
|
|
"calib/ece": 0.4732142857142857,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0141748131109833,
|
|
"calib/mean_conf": 0.08630952380952381,
|
|
"calib/mu_c": 0.09255319148936168,
|
|
"calib/mu_w": 0.07837837837837838,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022295620000390527,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2163.0,
|
|
"completions/max_terminated_length": 2163.0,
|
|
"completions/mean_length": 535.80859375,
|
|
"completions/mean_terminated_length": 540.0275268554688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 176.0,
|
|
"epoch": 0.1984,
|
|
"grad_norm": 0.008796021342277527,
|
|
"learning_rate": 3.8888888888888895e-07,
|
|
"loss": 0.0204,
|
|
"num_tokens": 39936632.0,
|
|
"reward": 1.1320539712905884,
|
|
"reward_std": 0.16783073544502258,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5277246236801147,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.8758916258811951,
|
|
"step": 186
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.6205218929677134,
|
|
"calib/avg_num_step_conf": 4.375,
|
|
"calib/ece": 0.43948412698412703,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.012052189296771293,
|
|
"calib/mean_conf": 0.0882936507936508,
|
|
"calib/mu_c": 0.093984962406015,
|
|
"calib/mu_w": 0.08193277310924371,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02117259664227484,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2917.0,
|
|
"completions/max_terminated_length": 2917.0,
|
|
"completions/mean_length": 525.6171875,
|
|
"completions/mean_terminated_length": 533.9603271484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.19946666666666665,
|
|
"grad_norm": 0.014326175674796104,
|
|
"learning_rate": 3.611111111111111e-07,
|
|
"loss": 0.0024,
|
|
"num_tokens": 40148414.0,
|
|
"reward": 1.1215332746505737,
|
|
"reward_std": 0.1880277693271637,
|
|
"rewards/accuracy_reward_step": 0.51953125,
|
|
"rewards/final_brier_reward_step": 0.5543847680091858,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.9054886102676392,
|
|
"step": 187
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6488744989207524,
|
|
"calib/avg_num_step_conf": 4.62109375,
|
|
"calib/ece": 0.465625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014887449892075216,
|
|
"calib/mean_conf": 0.08515625000000002,
|
|
"calib/mu_c": 0.09184397163120567,
|
|
"calib/mu_w": 0.07695652173913045,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022844049245646012,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1808.0,
|
|
"completions/max_terminated_length": 1808.0,
|
|
"completions/mean_length": 540.26953125,
|
|
"completions/mean_terminated_length": 542.3882446289062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 166.0,
|
|
"epoch": 0.20053333333333334,
|
|
"grad_norm": 0.016202721744775772,
|
|
"learning_rate": 3.3333333333333335e-07,
|
|
"loss": 0.0409,
|
|
"num_tokens": 40366475.0,
|
|
"reward": 1.1602380275726318,
|
|
"reward_std": 0.1599513441324234,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5426172018051147,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9525926113128662,
|
|
"step": 188
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6365546218487395,
|
|
"calib/avg_num_step_conf": 4.6171875,
|
|
"calib/ece": 0.4470588235294118,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.013655462184873915,
|
|
"calib/mean_conf": 0.08627450980392157,
|
|
"calib/mu_c": 0.09264705882352942,
|
|
"calib/mu_w": 0.0789915966386555,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022313346424982436,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1884.0,
|
|
"completions/max_terminated_length": 1884.0,
|
|
"completions/mean_length": 503.34375,
|
|
"completions/mean_terminated_length": 505.31768798828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 168.0,
|
|
"epoch": 0.2016,
|
|
"grad_norm": 0.009709118865430355,
|
|
"learning_rate": 3.055555555555556e-07,
|
|
"loss": 0.0062,
|
|
"num_tokens": 40578779.0,
|
|
"reward": 1.1314115524291992,
|
|
"reward_std": 0.1800665259361267,
|
|
"rewards/accuracy_reward_step": 0.53125,
|
|
"rewards/final_brier_reward_step": 0.555371105670929,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.8914668560028076,
|
|
"step": 189
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6638993710691824,
|
|
"calib/avg_num_step_conf": 4.578125,
|
|
"calib/ece": 0.500390625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.016389937106918204,
|
|
"calib/mean_conf": 0.08554687500000002,
|
|
"calib/mu_c": 0.09233333333333332,
|
|
"calib/mu_w": 0.07594339622641512,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02266635011938126,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2194.0,
|
|
"completions/max_terminated_length": 2194.0,
|
|
"completions/mean_length": 525.00390625,
|
|
"completions/mean_terminated_length": 527.0628051757812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 179.0,
|
|
"epoch": 0.20266666666666666,
|
|
"grad_norm": 0.021329551935195923,
|
|
"learning_rate": 2.7777777777777776e-07,
|
|
"loss": 0.0263,
|
|
"num_tokens": 40794468.0,
|
|
"reward": 1.1731104850769043,
|
|
"reward_std": 0.1828664243221283,
|
|
"rewards/accuracy_reward_step": 0.5859375,
|
|
"rewards/final_brier_reward_step": 0.5144335627555847,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9198249578475952,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5998120300751879,
|
|
"calib/avg_num_step_conf": 4.65234375,
|
|
"calib/ece": 0.3582677165354331,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009981203007518827,
|
|
"calib/mean_conf": 0.09055118110236221,
|
|
"calib/mu_c": 0.09605263157894738,
|
|
"calib/mu_w": 0.08607142857142855,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.01957449274748985,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1578.0,
|
|
"completions/max_terminated_length": 1578.0,
|
|
"completions/mean_length": 480.76171875,
|
|
"completions/mean_terminated_length": 482.6470947265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.20373333333333332,
|
|
"grad_norm": 0.01765361987054348,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": -0.0135,
|
|
"num_tokens": 40997391.0,
|
|
"reward": 1.0853232145309448,
|
|
"reward_std": 0.1648622453212738,
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
|
"rewards/final_brier_reward_step": 0.6239062547683716,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.91535484790802,
|
|
"step": 191
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.643006993006993,
|
|
"calib/avg_num_step_conf": 4.5859375,
|
|
"calib/ece": 0.48122529644268774,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014300699300699243,
|
|
"calib/mean_conf": 0.08399209486166008,
|
|
"calib/mu_c": 0.0902097902097902,
|
|
"calib/mu_w": 0.07590909090909095,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023326856410560478,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2388.0,
|
|
"completions/max_terminated_length": 2388.0,
|
|
"completions/mean_length": 529.7734375,
|
|
"completions/mean_terminated_length": 533.9448852539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.0,
|
|
"epoch": 0.2048,
|
|
"grad_norm": 0.014357575215399265,
|
|
"learning_rate": 2.2222222222222224e-07,
|
|
"loss": 0.0052,
|
|
"num_tokens": 41213669.0,
|
|
"reward": 1.146650791168213,
|
|
"reward_std": 0.19800010323524475,
|
|
"rewards/accuracy_reward_step": 0.55859375,
|
|
"rewards/final_brier_reward_step": 0.5190625190734863,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.9203532934188843,
|
|
"step": 192
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5985507246376811,
|
|
"calib/avg_num_step_conf": 4.6328125,
|
|
"calib/ece": 0.45790513833992097,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009855072463768128,
|
|
"calib/mean_conf": 0.0875494071146245,
|
|
"calib/mu_c": 0.09202898550724638,
|
|
"calib/mu_w": 0.08217391304347825,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02162203461914285,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 3013.0,
|
|
"completions/max_terminated_length": 3013.0,
|
|
"completions/mean_length": 518.34375,
|
|
"completions/mean_terminated_length": 522.4251708984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 230.0,
|
|
"epoch": 0.20586666666666667,
|
|
"grad_norm": 0.012046567164361477,
|
|
"learning_rate": 1.9444444444444447e-07,
|
|
"loss": 0.0088,
|
|
"num_tokens": 41427757.0,
|
|
"reward": 1.1411765813827515,
|
|
"reward_std": 0.2215532511472702,
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
|
"rewards/final_brier_reward_step": 0.5404003858566284,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.932343065738678,
|
|
"step": 193
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6130128366473697,
|
|
"calib/avg_num_step_conf": 4.60546875,
|
|
"calib/ece": 0.45434782608695656,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.011301283664736941,
|
|
"calib/mean_conf": 0.08715415019762845,
|
|
"calib/mu_c": 0.09233576642335765,
|
|
"calib/mu_w": 0.08103448275862071,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.021846661826775464,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1896.0,
|
|
"completions/max_terminated_length": 1896.0,
|
|
"completions/mean_length": 485.59375,
|
|
"completions/mean_terminated_length": 491.351806640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 224.0,
|
|
"epoch": 0.20693333333333333,
|
|
"grad_norm": 0.011294323019683361,
|
|
"learning_rate": 1.6666666666666668e-07,
|
|
"loss": -0.0256,
|
|
"num_tokens": 41633693.0,
|
|
"reward": 1.1376094818115234,
|
|
"reward_std": 0.15752781927585602,
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
|
"rewards/final_brier_reward_step": 0.5439746379852295,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.9265509247779846,
|
|
"step": 194
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6576485461441214,
|
|
"calib/avg_num_step_conf": 4.5859375,
|
|
"calib/ece": 0.4689723320158103,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.015764854614412113,
|
|
"calib/mean_conf": 0.08438735177865611,
|
|
"calib/mu_c": 0.09142857142857141,
|
|
"calib/mu_w": 0.0756637168141593,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02317061990072259,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2436.0,
|
|
"completions/max_terminated_length": 2436.0,
|
|
"completions/mean_length": 529.19140625,
|
|
"completions/mean_terminated_length": 531.2667236328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 197.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.019001536071300507,
|
|
"learning_rate": 1.3888888888888888e-07,
|
|
"loss": 0.0243,
|
|
"num_tokens": 41850830.0,
|
|
"reward": 1.1309077739715576,
|
|
"reward_std": 0.1469835638999939,
|
|
"rewards/accuracy_reward_step": 0.546875,
|
|
"rewards/final_brier_reward_step": 0.5338379144668579,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8731424808502197,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5560987684881491,
|
|
"calib/avg_num_step_conf": 4.75,
|
|
"calib/ece": 0.46386718750000006,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005609876848814865,
|
|
"calib/mean_conf": 0.09472656250000001,
|
|
"calib/mu_c": 0.09720279720279718,
|
|
"calib/mu_w": 0.09159292035398231,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.015357823150876356,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1108.0,
|
|
"completions/max_terminated_length": 1108.0,
|
|
"completions/mean_length": 405.54296875,
|
|
"completions/mean_terminated_length": 407.13336181640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.0,
|
|
"epoch": 0.20906666666666668,
|
|
"grad_norm": 0.030068090185523033,
|
|
"learning_rate": 1.1111111111111112e-07,
|
|
"loss": -0.0074,
|
|
"num_tokens": 42032873.0,
|
|
"reward": 1.1597189903259277,
|
|
"reward_std": 0.16683349013328552,
|
|
"rewards/accuracy_reward_step": 0.55859375,
|
|
"rewards/final_brier_reward_step": 0.5407910346984863,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.9229186177253723,
|
|
"step": 196
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.6479433878814684,
|
|
"calib/avg_num_step_conf": 4.71484375,
|
|
"calib/ece": 0.38591269841269843,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014794338788146844,
|
|
"calib/mean_conf": 0.08630952380952381,
|
|
"calib/mu_c": 0.09411764705882353,
|
|
"calib/mu_w": 0.07932330827067668,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022295620000390527,
|
|
"calib/step_conf_rate": 0.984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2626.0,
|
|
"completions/max_terminated_length": 2626.0,
|
|
"completions/mean_length": 535.5078125,
|
|
"completions/mean_terminated_length": 544.0079956054688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 142.0,
|
|
"epoch": 0.21013333333333334,
|
|
"grad_norm": 0.013797251507639885,
|
|
"learning_rate": 8.333333333333334e-08,
|
|
"loss": -0.0004,
|
|
"num_tokens": 42250699.0,
|
|
"reward": 1.0935965776443481,
|
|
"reward_std": 0.19439850747585297,
|
|
"rewards/accuracy_reward_step": 0.46484375,
|
|
"rewards/final_brier_reward_step": 0.5953418016433716,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/stepwise_brier_reward": 0.9321398138999939,
|
|
"step": 197
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6408385093167702,
|
|
"calib/avg_num_step_conf": 4.62109375,
|
|
"calib/ece": 0.46215686274509804,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014083850931677008,
|
|
"calib/mean_conf": 0.0868627450980392,
|
|
"calib/mu_c": 0.0932142857142857,
|
|
"calib/mu_w": 0.07913043478260869,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.022006255445644236,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1970.0,
|
|
"completions/max_terminated_length": 1970.0,
|
|
"completions/mean_length": 468.3046875,
|
|
"completions/mean_terminated_length": 470.1412048339844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 146.0,
|
|
"epoch": 0.2112,
|
|
"grad_norm": 0.02300938405096531,
|
|
"learning_rate": 5.555555555555556e-08,
|
|
"loss": 0.025,
|
|
"num_tokens": 42451649.0,
|
|
"reward": 1.145423173904419,
|
|
"reward_std": 0.1318647563457489,
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
|
"rewards/final_brier_reward_step": 0.5424317121505737,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.8968296647071838,
|
|
"step": 198
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6243405945509837,
|
|
"calib/avg_num_step_conf": 4.7109375,
|
|
"calib/ece": 0.43169291338582677,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.012434059455098359,
|
|
"calib/mean_conf": 0.08405511811023621,
|
|
"calib/mu_c": 0.09007633587786258,
|
|
"calib/mu_w": 0.07764227642276422,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.023302464161750625,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2527.0,
|
|
"completions/max_terminated_length": 2527.0,
|
|
"completions/mean_length": 559.46484375,
|
|
"completions/mean_terminated_length": 561.6588745117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 115.0,
|
|
"epoch": 0.21226666666666666,
|
|
"grad_norm": 0.011882714927196503,
|
|
"learning_rate": 2.777777777777778e-08,
|
|
"loss": 0.0657,
|
|
"num_tokens": 42674752.0,
|
|
"reward": 1.123356580734253,
|
|
"reward_std": 0.23471125960350037,
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
|
"rewards/final_brier_reward_step": 0.5651074647903442,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.9194613099098206,
|
|
"step": 199
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.7657669519420671,
|
|
"calib/avg_num_step_conf": 4.6171875,
|
|
"calib/ece": 0.5300395256916997,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.02657669519420669,
|
|
"calib/mean_conf": 0.08260869565217391,
|
|
"calib/mu_c": 0.09290322580645159,
|
|
"calib/mu_w": 0.0663265306122449,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.02381402423935505,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2313.0,
|
|
"completions/max_terminated_length": 2313.0,
|
|
"completions/mean_length": 535.796875,
|
|
"completions/mean_terminated_length": 540.0157470703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 166.0,
|
|
"epoch": 0.21333333333333335,
|
|
"grad_norm": 0.011222266592085361,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0136,
|
|
"num_tokens": 42895644.0,
|
|
"reward": 1.1725003719329834,
|
|
"reward_std": 0.12921534478664398,
|
|
"rewards/accuracy_reward_step": 0.60546875,
|
|
"rewards/final_brier_reward_step": 0.4880078434944153,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.8967982530593872,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.21333333333333335,
|
|
"step": 200,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.01614250882237684,
|
|
"train_runtime": 9613.004,
|
|
"train_samples_per_second": 5.326,
|
|
"train_steps_per_second": 0.021
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 42895644,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 20,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|