Files
PureRL-1.5B-v13D-lam025/trainer_state.json
ModelHub XC a264029dae 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/PureRL-1.5B-v13D-lam025
Source: Original Platform
2026-06-04 17:30:20 +08:00

9843 lines
385 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.791089203391117e-07,
"aux_brier/mean_group_std": 0.06289231620091193,
"aux_brier/mean_r": 0.4665906001184907,
"aux_brier/n_active_tok": 24.615384615384617,
"aux_brier/n_groups": 5.3076923076923075,
"aux_brier/n_step_records": 6.153846153846154,
"calib/answer_extract_rate": 0.08203125,
"calib/auroc": 0.6944444444444445,
"calib/avg_num_step_conf": 0.3359375,
"calib/ece": 0.6230769230769231,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.03861111111111115,
"calib/mean_conf": 0.9307692307692309,
"calib/mu_c": 0.9575,
"calib/mu_w": 0.9188888888888889,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.09765625,
"calib/nonempty_step_conf_rate": 0.0703125,
"calib/pce": 0.6230769230769231,
"calib/std_conf": 0.07965903671384378,
"calib/step_conf_rate": 0.0703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2955.0,
"completions/max_terminated_length": 2955.0,
"completions/mean_length": 613.67578125,
"completions/mean_terminated_length": 674.2532348632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.8019087314605713,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0332,
"num_tokens": 264685.0,
"reward": 0.03929531201720238,
"reward_std": 0.08434611558914185,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.01655624993145466,
"rewards/format_reward_step_strict": 0.0390625,
"step": 1
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.461345396504181e-08,
"aux_brier/mean_group_std": 0.046398653263787254,
"aux_brier/mean_r": 0.430243897442093,
"aux_brier/n_active_tok": 28.42105263157895,
"aux_brier/n_groups": 5.894736842105263,
"aux_brier/n_step_records": 7.105263157894737,
"calib/answer_extract_rate": 0.13671875,
"calib/auroc": 0.5338345864661654,
"calib/avg_num_step_conf": 0.55078125,
"calib/ece": 0.6261538461538463,
"calib/final_conf_rate": 0.1015625,
"calib/format_rate": 0.08984375,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.002406015037593856,
"calib/mean_conf": 0.8953846153846153,
"calib/mu_c": 0.897142857142857,
"calib/mu_w": 0.8947368421052632,
"calib/nonempty_final_conf_rate": 0.1015625,
"calib/nonempty_reasoning_rate": 0.14453125,
"calib/nonempty_step_conf_rate": 0.109375,
"calib/pce": 0.6261538461538463,
"calib/std_conf": 0.18653172073466937,
"calib/step_conf_rate": 0.109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0546875,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 646.4609375,
"completions/mean_terminated_length": 683.8594970703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.0070538897998631,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0643,
"num_tokens": 533467.0,
"reward": 0.08358447253704071,
"reward_std": 0.15892045199871063,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.02965039201080799,
"rewards/format_reward_step_strict": 0.08984375,
"step": 2
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.9270137424930454e-09,
"aux_brier/mean_group_std": 0.016647923275186365,
"aux_brier/mean_r": 0.36642824813769415,
"aux_brier/n_active_tok": 22.666666666666668,
"aux_brier/n_groups": 5.416666666666667,
"aux_brier/n_step_records": 5.666666666666667,
"calib/answer_extract_rate": 0.0546875,
"calib/auroc": 0.75,
"calib/avg_num_step_conf": 0.296875,
"calib/ece": 0.8140714285714286,
"calib/final_conf_rate": 0.0546875,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.9285714285714286,
"calib/gap": 0.03858333333333319,
"calib/mean_conf": 0.9569285714285716,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9514166666666668,
"calib/nonempty_final_conf_rate": 0.0546875,
"calib/nonempty_reasoning_rate": 0.0703125,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.8140714285714286,
"calib/std_conf": 0.0642744831459069,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 2982.0,
"completions/max_terminated_length": 2982.0,
"completions/mean_length": 650.859375,
"completions/mean_terminated_length": 718.1896362304688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.0033897303510457277,
"learning_rate": 7.5e-07,
"loss": 0.0223,
"num_tokens": 805343.0,
"reward": 0.026312783360481262,
"reward_std": 0.058088187128305435,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.011501136235892773,
"rewards/format_reward_step_strict": 0.03125,
"step": 3
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -9.31322515403584e-09,
"aux_brier/mean_group_std": 0.037747883123163066,
"aux_brier/mean_r": 0.42610642501873497,
"aux_brier/n_active_tok": 24.0,
"aux_brier/n_groups": 4.583333333333333,
"aux_brier/n_step_records": 6.0,
"calib/answer_extract_rate": 0.0625,
"calib/auroc": 0.22222222222222227,
"calib/avg_num_step_conf": 0.30078125,
"calib/ece": 0.8589999999999999,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.9,
"calib/gap": -0.01000000000000012,
"calib/mean_conf": 0.959,
"calib/mu_c": 0.95,
"calib/mu_w": 0.9600000000000001,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.08203125,
"calib/nonempty_step_conf_rate": 0.0625,
"calib/pce": 0.8589999999999999,
"calib/std_conf": 0.02913760456866693,
"calib/step_conf_rate": 0.0625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1171875,
"completions/max_length": 3013.0,
"completions/max_terminated_length": 3013.0,
"completions/mean_length": 660.6953125,
"completions/mean_terminated_length": 748.3982543945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.004266666666666667,
"grad_norm": 1.0983048677444458,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.031,
"num_tokens": 1080649.0,
"reward": 0.021128516644239426,
"reward_std": 0.055777207016944885,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.006389062851667404,
"rewards/format_reward_step_strict": 0.03125,
"step": 4
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.5762788286319847e-08,
"aux_brier/mean_group_std": 0.011597597801238597,
"aux_brier/mean_r": 0.5177998114517441,
"aux_brier/n_active_tok": 25.5,
"aux_brier/n_groups": 5.875,
"aux_brier/n_step_records": 6.375,
"calib/answer_extract_rate": 0.03515625,
"calib/auroc": 0.25,
"calib/avg_num_step_conf": 0.203125,
"calib/ece": 0.8155555555555556,
"calib/final_conf_rate": 0.03515625,
"calib/format_rate": 0.02734375,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": -0.018749999999999933,
"calib/mean_conf": 0.9266666666666666,
"calib/mu_c": 0.91,
"calib/mu_w": 0.92875,
"calib/nonempty_final_conf_rate": 0.03515625,
"calib/nonempty_reasoning_rate": 0.04296875,
"calib/nonempty_step_conf_rate": 0.0390625,
"calib/pce": 0.8155555555555556,
"calib/std_conf": 0.06765927710061478,
"calib/step_conf_rate": 0.0390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 3041.0,
"completions/max_terminated_length": 3041.0,
"completions/mean_length": 804.40234375,
"completions/mean_terminated_length": 865.2395629882812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.0030431875493377447,
"learning_rate": 1.25e-06,
"loss": 0.0143,
"num_tokens": 1393264.0,
"reward": 0.01749306544661522,
"reward_std": 0.03999492526054382,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.007472265511751175,
"rewards/format_reward_step_strict": 0.0234375,
"step": 5
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.5357464446340503e-08,
"aux_brier/mean_group_std": 0.018072298580218516,
"aux_brier/mean_r": 0.500327113360118,
"aux_brier/n_active_tok": 26.105263157894736,
"aux_brier/n_groups": 6.2631578947368425,
"aux_brier/n_step_records": 6.526315789473684,
"calib/answer_extract_rate": 0.10546875,
"calib/auroc": 0.9,
"calib/avg_num_step_conf": 0.49609375,
"calib/ece": 0.8293750000000001,
"calib/final_conf_rate": 0.0625,
"calib/format_rate": 0.0546875,
"calib/frac_conf_gt_0.9": 0.8125,
"calib/gap": 0.10466666666666657,
"calib/mean_conf": 0.891875,
"calib/mu_c": 0.99,
"calib/mu_w": 0.8853333333333334,
"calib/nonempty_final_conf_rate": 0.0625,
"calib/nonempty_reasoning_rate": 0.1328125,
"calib/nonempty_step_conf_rate": 0.0859375,
"calib/pce": 0.8293750000000001,
"calib/std_conf": 0.19013872402801066,
"calib/step_conf_rate": 0.0859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3064.0,
"completions/max_terminated_length": 3064.0,
"completions/mean_length": 624.328125,
"completions/mean_terminated_length": 677.2373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.46461933851242065,
"learning_rate": 1.5e-06,
"loss": 0.0328,
"num_tokens": 1659044.0,
"reward": 0.029817283153533936,
"reward_std": 0.0761086717247963,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.009894140064716339,
"rewards/format_reward_step_strict": 0.046875,
"step": 6
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.662441064184046e-09,
"aux_brier/mean_group_std": 0.0037897744879094105,
"aux_brier/mean_r": 0.4083800068223509,
"aux_brier/n_active_tok": 21.866666666666667,
"aux_brier/n_groups": 5.4,
"aux_brier/n_step_records": 5.466666666666667,
"calib/answer_extract_rate": 0.078125,
"calib/auroc": 0.25,
"calib/avg_num_step_conf": 0.33203125,
"calib/ece": 0.6752941176470587,
"calib/final_conf_rate": 0.06640625,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.8823529411764706,
"calib/gap": 0.04500000000000015,
"calib/mean_conf": 0.9105882352941176,
"calib/mu_c": 0.9450000000000001,
"calib/mu_w": 0.8999999999999999,
"calib/nonempty_final_conf_rate": 0.06640625,
"calib/nonempty_reasoning_rate": 0.1015625,
"calib/nonempty_step_conf_rate": 0.06640625,
"calib/pce": 0.6752941176470587,
"calib/std_conf": 0.20408967429958172,
"calib/step_conf_rate": 0.06640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 2940.0,
"completions/max_terminated_length": 2940.0,
"completions/mean_length": 737.65234375,
"completions/mean_terminated_length": 807.0043334960938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.2528690993785858,
"learning_rate": 1.75e-06,
"loss": 0.001,
"num_tokens": 1955307.0,
"reward": 0.03929208964109421,
"reward_std": 0.08367627114057541,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.01654335856437683,
"rewards/format_reward_step_strict": 0.0390625,
"step": 7
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -9.628442180306033e-09,
"aux_brier/mean_group_std": 0.008700643628001825,
"aux_brier/mean_r": 0.432177396158137,
"aux_brier/n_active_tok": 18.46153846153846,
"aux_brier/n_groups": 4.384615384615385,
"aux_brier/n_step_records": 4.615384615384615,
"calib/answer_extract_rate": 0.0625,
"calib/auroc": 0.4285714285714286,
"calib/avg_num_step_conf": 0.234375,
"calib/ece": 0.5554545454545454,
"calib/final_conf_rate": 0.04296875,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.7272727272727273,
"calib/gap": 0.06892857142857134,
"calib/mean_conf": 0.8536363636363636,
"calib/mu_c": 0.8975,
"calib/mu_w": 0.8285714285714286,
"calib/nonempty_final_conf_rate": 0.04296875,
"calib/nonempty_reasoning_rate": 0.08203125,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.5227272727272727,
"calib/std_conf": 0.27634868380602484,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2914.0,
"completions/max_terminated_length": 2914.0,
"completions/mean_length": 608.0234375,
"completions/mean_terminated_length": 662.357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.45698079466819763,
"learning_rate": 2.0000000000000003e-06,
"loss": -0.0152,
"num_tokens": 2217473.0,
"reward": 0.03828798606991768,
"reward_std": 0.08853976428508759,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.02033945359289646,
"rewards/format_reward_step_strict": 0.03515625,
"step": 8
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.008694010369464e-08,
"aux_brier/mean_group_std": 0.02933807589196565,
"aux_brier/mean_r": 0.4913759569149179,
"aux_brier/n_active_tok": 18.76923076923077,
"aux_brier/n_groups": 4.076923076923077,
"aux_brier/n_step_records": 4.6923076923076925,
"calib/answer_extract_rate": 0.078125,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 0.24609375,
"calib/ece": 0.7916666666666665,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.02734375,
"calib/frac_conf_gt_0.9": 0.9166666666666666,
"calib/gap": -0.010000000000000009,
"calib/mean_conf": 0.9583333333333331,
"calib/mu_c": 0.95,
"calib/mu_w": 0.96,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.1015625,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.7916666666666665,
"calib/std_conf": 0.027335365778094534,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 3057.0,
"completions/max_terminated_length": 3057.0,
"completions/mean_length": 620.76171875,
"completions/mean_terminated_length": 687.9437255859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0096,
"grad_norm": 0.003539730329066515,
"learning_rate": 2.25e-06,
"loss": 0.0037,
"num_tokens": 2483924.0,
"reward": 0.02196083962917328,
"reward_std": 0.05420968309044838,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.0019058594480156898,
"rewards/format_reward_step_strict": 0.02734375,
"step": 9
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.4545744887618906e-08,
"aux_brier/mean_group_std": 0.06240475362047568,
"aux_brier/mean_r": 0.46709847997319487,
"aux_brier/n_active_tok": 27.058823529411764,
"aux_brier/n_groups": 5.235294117647059,
"aux_brier/n_step_records": 6.764705882352941,
"calib/answer_extract_rate": 0.0859375,
"calib/auroc": 0.625,
"calib/avg_num_step_conf": 0.48828125,
"calib/ece": 0.726842105263158,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.07421875,
"calib/frac_conf_gt_0.9": 0.7894736842105263,
"calib/gap": 0.10520833333333324,
"calib/mean_conf": 0.884736842105263,
"calib/mu_c": 0.9733333333333333,
"calib/mu_w": 0.868125,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.11328125,
"calib/nonempty_step_conf_rate": 0.10546875,
"calib/pce": 0.726842105263158,
"calib/std_conf": 0.2317296160581309,
"calib/step_conf_rate": 0.10546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2868.0,
"completions/max_terminated_length": 2868.0,
"completions/mean_length": 661.26171875,
"completions/mean_terminated_length": 720.3531494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.010666666666666666,
"grad_norm": 1.4912821054458618,
"learning_rate": 2.5e-06,
"loss": 0.0379,
"num_tokens": 2760007.0,
"reward": 0.05463603138923645,
"reward_std": 0.1257432997226715,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.023231640458106995,
"rewards/format_reward_step_strict": 0.06640625,
"step": 10
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.9474720453555644e-08,
"aux_brier/mean_group_std": 0.021085085040818102,
"aux_brier/mean_r": 0.3412877070435653,
"aux_brier/n_active_tok": 21.77777777777778,
"aux_brier/n_groups": 4.333333333333333,
"aux_brier/n_step_records": 5.444444444444445,
"calib/answer_extract_rate": 0.08984375,
"calib/auroc": 0.5476190476190476,
"calib/avg_num_step_conf": 0.40234375,
"calib/ece": 0.6213157894736843,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.05078125,
"calib/frac_conf_gt_0.9": 0.7368421052631579,
"calib/gap": -0.08851190476190474,
"calib/mean_conf": 0.8844736842105263,
"calib/mu_c": 0.8285714285714285,
"calib/mu_w": 0.9170833333333333,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.12890625,
"calib/nonempty_step_conf_rate": 0.09375,
"calib/pce": 0.5686842105263158,
"calib/std_conf": 0.23487339667440835,
"calib/step_conf_rate": 0.09375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2911.0,
"completions/max_terminated_length": 2911.0,
"completions/mean_length": 666.3671875,
"completions/mean_terminated_length": 741.6956176757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011733333333333333,
"grad_norm": 1.0365098714828491,
"learning_rate": 2.7500000000000004e-06,
"loss": -0.0021,
"num_tokens": 3035077.0,
"reward": 0.0539645291864872,
"reward_std": 0.11456333100795746,
"rewards/accuracy_reward_step": 0.02734375,
"rewards/final_brier_reward_step": 0.020545605570077896,
"rewards/format_reward_step_strict": 0.04296875,
"step": 11
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.703742298468484e-08,
"aux_brier/mean_group_std": 0.0732713368122301,
"aux_brier/mean_r": 0.4649826706328534,
"aux_brier/n_active_tok": 27.4,
"aux_brier/n_groups": 5.0,
"aux_brier/n_step_records": 6.85,
"calib/answer_extract_rate": 0.1484375,
"calib/auroc": 0.5113636363636365,
"calib/avg_num_step_conf": 0.5390625,
"calib/ece": 0.5473235294117647,
"calib/final_conf_rate": 0.1328125,
"calib/format_rate": 0.1015625,
"calib/frac_conf_gt_0.9": 0.6764705882352942,
"calib/gap": 0.04595454545454547,
"calib/mean_conf": 0.900264705882353,
"calib/mu_c": 0.93,
"calib/mu_w": 0.8840454545454546,
"calib/nonempty_final_conf_rate": 0.1328125,
"calib/nonempty_reasoning_rate": 0.16015625,
"calib/nonempty_step_conf_rate": 0.12109375,
"calib/pce": 0.5473235294117647,
"calib/std_conf": 0.1299206338780197,
"calib/step_conf_rate": 0.12109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2937.0,
"completions/max_terminated_length": 2937.0,
"completions/mean_length": 623.390625,
"completions/mean_terminated_length": 679.0978393554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0128,
"grad_norm": 0.6270592212677002,
"learning_rate": 3e-06,
"loss": 0.0169,
"num_tokens": 3298841.0,
"reward": 0.11310829222202301,
"reward_std": 0.21758697926998138,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.05399569869041443,
"rewards/format_reward_step_strict": 0.09765625,
"step": 12
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 9.130551629052011e-08,
"aux_brier/mean_group_std": 0.0709884432844243,
"aux_brier/mean_r": 0.4623331838224613,
"aux_brier/n_active_tok": 29.523809523809526,
"aux_brier/n_groups": 4.9523809523809526,
"aux_brier/n_step_records": 7.380952380952381,
"calib/answer_extract_rate": 0.1640625,
"calib/auroc": 0.6074380165289256,
"calib/avg_num_step_conf": 0.609375,
"calib/ece": 0.6156565656565657,
"calib/final_conf_rate": 0.12890625,
"calib/format_rate": 0.10546875,
"calib/frac_conf_gt_0.9": 0.7575757575757576,
"calib/gap": -0.025606060606060743,
"calib/mean_conf": 0.851010101010101,
"calib/mu_c": 0.833939393939394,
"calib/mu_w": 0.8595454545454547,
"calib/nonempty_final_conf_rate": 0.12890625,
"calib/nonempty_reasoning_rate": 0.1875,
"calib/nonempty_step_conf_rate": 0.13671875,
"calib/pce": 0.5666666666666667,
"calib/std_conf": 0.25219105158053523,
"calib/step_conf_rate": 0.13671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3053.0,
"completions/max_terminated_length": 3053.0,
"completions/mean_length": 627.24609375,
"completions/mean_terminated_length": 680.4025268554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.013427832163870335,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0577,
"num_tokens": 3564008.0,
"reward": 0.10244810581207275,
"reward_std": 0.18835735321044922,
"rewards/accuracy_reward_step": 0.04296875,
"rewards/final_brier_reward_step": 0.05041740834712982,
"rewards/format_reward_step_strict": 0.09375,
"step": 13
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.1200137757683137e-07,
"aux_brier/mean_group_std": 0.05895216164706083,
"aux_brier/mean_r": 0.42568322537953823,
"aux_brier/n_active_tok": 34.4,
"aux_brier/n_groups": 6.68,
"aux_brier/n_step_records": 8.6,
"calib/answer_extract_rate": 0.1953125,
"calib/auroc": 0.5366379310344828,
"calib/avg_num_step_conf": 0.85546875,
"calib/ece": 0.613117117117117,
"calib/final_conf_rate": 0.14453125,
"calib/format_rate": 0.12109375,
"calib/frac_conf_gt_0.9": 0.7027027027027027,
"calib/gap": 0.12524712643678182,
"calib/mean_conf": 0.8293333333333333,
"calib/mu_c": 0.9275,
"calib/mu_w": 0.8022528735632182,
"calib/nonempty_final_conf_rate": 0.14453125,
"calib/nonempty_reasoning_rate": 0.23046875,
"calib/nonempty_step_conf_rate": 0.1640625,
"calib/pce": 0.613117117117117,
"calib/std_conf": 0.2795984250300407,
"calib/step_conf_rate": 0.1640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.109375,
"completions/max_length": 3047.0,
"completions/max_terminated_length": 3047.0,
"completions/mean_length": 643.43359375,
"completions/mean_terminated_length": 722.4517822265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.56694495677948,
"learning_rate": 3.5e-06,
"loss": 0.0391,
"num_tokens": 3834127.0,
"reward": 0.10360130667686462,
"reward_std": 0.17667700350284576,
"rewards/accuracy_reward_step": 0.03515625,
"rewards/final_brier_reward_step": 0.047217756509780884,
"rewards/format_reward_step_strict": 0.11328125,
"step": 14
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.4157165954693922e-08,
"aux_brier/mean_group_std": 0.0736477397877375,
"aux_brier/mean_r": 0.5192831668822072,
"aux_brier/n_active_tok": 33.09090909090909,
"aux_brier/n_groups": 4.909090909090909,
"aux_brier/n_step_records": 8.272727272727273,
"calib/answer_extract_rate": 0.21484375,
"calib/auroc": 0.404692082111437,
"calib/avg_num_step_conf": 0.73046875,
"calib/ece": 0.6854523809523809,
"calib/final_conf_rate": 0.1640625,
"calib/format_rate": 0.12890625,
"calib/frac_conf_gt_0.9": 0.7619047619047619,
"calib/gap": -0.07600879765395907,
"calib/mean_conf": 0.8507380952380951,
"calib/mu_c": 0.7946363636363636,
"calib/mu_w": 0.8706451612903227,
"calib/nonempty_final_conf_rate": 0.1640625,
"calib/nonempty_reasoning_rate": 0.25390625,
"calib/nonempty_step_conf_rate": 0.17578125,
"calib/pce": 0.6371428571428571,
"calib/std_conf": 0.2692581571938341,
"calib/step_conf_rate": 0.17578125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 3005.0,
"completions/max_terminated_length": 3005.0,
"completions/mean_length": 641.0078125,
"completions/mean_terminated_length": 680.9046020507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.34427380561828613,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0959,
"num_tokens": 4106105.0,
"reward": 0.11405427753925323,
"reward_std": 0.2070690542459488,
"rewards/accuracy_reward_step": 0.04296875,
"rewards/final_brier_reward_step": 0.04996710270643234,
"rewards/format_reward_step_strict": 0.1171875,
"step": 15
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.01300370545232e-07,
"aux_brier/mean_group_std": 0.10674272866305466,
"aux_brier/mean_r": 0.5689924458994036,
"aux_brier/n_active_tok": 38.24,
"aux_brier/n_groups": 5.88,
"aux_brier/n_step_records": 9.56,
"calib/answer_extract_rate": 0.21484375,
"calib/auroc": 0.3703208556149733,
"calib/avg_num_step_conf": 0.95703125,
"calib/ece": 0.6723777777777779,
"calib/final_conf_rate": 0.17578125,
"calib/format_rate": 0.12890625,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": -0.0361951871657753,
"calib/mean_conf": 0.8937111111111109,
"calib/mu_c": 0.8663636363636364,
"calib/mu_w": 0.9025588235294117,
"calib/nonempty_final_conf_rate": 0.17578125,
"calib/nonempty_reasoning_rate": 0.2890625,
"calib/nonempty_step_conf_rate": 0.22265625,
"calib/pce": 0.6608222222222223,
"calib/std_conf": 0.17408996680799813,
"calib/step_conf_rate": 0.22265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 3031.0,
"completions/max_terminated_length": 3031.0,
"completions/mean_length": 666.52734375,
"completions/mean_terminated_length": 729.1923217773438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.944327712059021,
"learning_rate": 4.000000000000001e-06,
"loss": 0.1057,
"num_tokens": 4385584.0,
"reward": 0.11981875449419022,
"reward_std": 0.24961884319782257,
"rewards/accuracy_reward_step": 0.04296875,
"rewards/final_brier_reward_step": 0.04958750307559967,
"rewards/format_reward_step_strict": 0.12890625,
"step": 16
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.3937364507766437e-07,
"aux_brier/mean_group_std": 0.12641517603858168,
"aux_brier/mean_r": 0.44455731146964983,
"aux_brier/n_active_tok": 46.370370370370374,
"aux_brier/n_groups": 6.925925925925926,
"aux_brier/n_step_records": 11.592592592592593,
"calib/answer_extract_rate": 0.26171875,
"calib/auroc": 0.6513975155279502,
"calib/avg_num_step_conf": 1.2421875,
"calib/ece": 0.6728333333333334,
"calib/final_conf_rate": 0.234375,
"calib/format_rate": 0.1796875,
"calib/frac_conf_gt_0.9": 0.6833333333333333,
"calib/gap": 0.008291925465838568,
"calib/mean_conf": 0.9015000000000001,
"calib/mu_c": 0.9078571428571428,
"calib/mu_w": 0.8995652173913042,
"calib/nonempty_final_conf_rate": 0.234375,
"calib/nonempty_reasoning_rate": 0.31640625,
"calib/nonempty_step_conf_rate": 0.2421875,
"calib/pce": 0.6705000000000001,
"calib/std_conf": 0.13488050266810248,
"calib/step_conf_rate": 0.2421875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 2922.0,
"completions/max_terminated_length": 2922.0,
"completions/mean_length": 589.24609375,
"completions/mean_terminated_length": 644.6453247070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.018133333333333335,
"grad_norm": 2.1329660415649414,
"learning_rate": 4.25e-06,
"loss": 0.1334,
"num_tokens": 4639959.0,
"reward": 0.16368788480758667,
"reward_std": 0.28559666872024536,
"rewards/accuracy_reward_step": 0.05859375,
"rewards/final_brier_reward_step": 0.06881406903266907,
"rewards/format_reward_step_strict": 0.17578125,
"step": 17
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.4014752091667495e-10,
"aux_brier/mean_group_std": 0.10244276346951456,
"aux_brier/mean_r": 0.5107277369249441,
"aux_brier/n_active_tok": 42.42857142857143,
"aux_brier/n_groups": 6.25,
"aux_brier/n_step_records": 10.607142857142858,
"calib/answer_extract_rate": 0.24609375,
"calib/auroc": 0.49113475177304966,
"calib/avg_num_step_conf": 1.18359375,
"calib/ece": 0.7297457627114853,
"calib/final_conf_rate": 0.23046875,
"calib/format_rate": 0.171875,
"calib/frac_conf_gt_0.9": 0.6949152542372882,
"calib/gap": -0.01680851064016098,
"calib/mean_conf": 0.9083898305080954,
"calib/mu_c": 0.8949999999981367,
"calib/mu_w": 0.9118085106382977,
"calib/nonempty_final_conf_rate": 0.23046875,
"calib/nonempty_reasoning_rate": 0.3125,
"calib/nonempty_step_conf_rate": 0.24609375,
"calib/pce": 0.7173728813555531,
"calib/std_conf": 0.1312051075053675,
"calib/step_conf_rate": 0.24609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 2718.0,
"completions/max_terminated_length": 2718.0,
"completions/mean_length": 628.92578125,
"completions/mean_terminated_length": 670.8541870117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0192,
"grad_norm": 1.147321343421936,
"learning_rate": 4.5e-06,
"loss": 0.0787,
"num_tokens": 4911684.0,
"reward": 0.14179620146751404,
"reward_std": 0.23915956914424896,
"rewards/accuracy_reward_step": 0.046875,
"rewards/final_brier_reward_step": 0.05937226489186287,
"rewards/format_reward_step_strict": 0.16015625,
"step": 18
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.8516915939746923e-08,
"aux_brier/mean_group_std": 0.16628030885569522,
"aux_brier/mean_r": 0.49903873928528314,
"aux_brier/n_active_tok": 57.5,
"aux_brier/n_groups": 7.09375,
"aux_brier/n_step_records": 14.375,
"calib/answer_extract_rate": 0.41015625,
"calib/auroc": 0.5369047619047619,
"calib/avg_num_step_conf": 1.89453125,
"calib/ece": 0.7307682692307691,
"calib/final_conf_rate": 0.40625,
"calib/format_rate": 0.31640625,
"calib/frac_conf_gt_0.9": 0.7211538461538461,
"calib/gap": -0.03471309523809529,
"calib/mean_conf": 0.8865375,
"calib/mu_c": 0.8584999999999999,
"calib/mu_w": 0.8932130952380952,
"calib/nonempty_final_conf_rate": 0.40625,
"calib/nonempty_reasoning_rate": 0.453125,
"calib/nonempty_step_conf_rate": 0.390625,
"calib/pce": 0.7124990384615384,
"calib/std_conf": 0.19888949298711855,
"calib/step_conf_rate": 0.390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 3012.0,
"completions/max_terminated_length": 3012.0,
"completions/mean_length": 476.7265625,
"completions/mean_terminated_length": 500.1720886230469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.4700727164745331,
"learning_rate": 4.75e-06,
"loss": 0.0943,
"num_tokens": 5138486.0,
"reward": 0.25758230686187744,
"reward_std": 0.36318379640579224,
"rewards/accuracy_reward_step": 0.078125,
"rewards/final_brier_reward_step": 0.10845429450273514,
"rewards/format_reward_step_strict": 0.3046875,
"step": 19
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.143965946587727e-09,
"aux_brier/mean_group_std": 0.19972032193623057,
"aux_brier/mean_r": 0.5313528417849928,
"aux_brier/n_active_tok": 82.0,
"aux_brier/n_groups": 7.71875,
"aux_brier/n_step_records": 20.5,
"calib/answer_extract_rate": 0.5703125,
"calib/auroc": 0.5001596424010217,
"calib/avg_num_step_conf": 2.625,
"calib/ece": 0.6142682926829269,
"calib/final_conf_rate": 0.48046875,
"calib/format_rate": 0.3984375,
"calib/frac_conf_gt_0.9": 0.7235772357723578,
"calib/gap": 0.00022509578544083197,
"calib/mean_conf": 0.8924796747967481,
"calib/mu_c": 0.892638888888889,
"calib/mu_w": 0.8924137931034481,
"calib/nonempty_final_conf_rate": 0.48046875,
"calib/nonempty_reasoning_rate": 0.66015625,
"calib/nonempty_step_conf_rate": 0.53515625,
"calib/pce": 0.6070325203252033,
"calib/std_conf": 0.19001644556955094,
"calib/step_conf_rate": 0.53515625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 429.28125,
"completions/mean_terminated_length": 439.5840148925781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.4443240165710449,
"learning_rate": 5e-06,
"loss": 0.0815,
"num_tokens": 5353254.0,
"reward": 0.38403499126434326,
"reward_std": 0.4633830189704895,
"rewards/accuracy_reward_step": 0.1484375,
"rewards/final_brier_reward_step": 0.17676514387130737,
"rewards/format_reward_step_strict": 0.3828125,
"step": 20
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.135037942777254e-08,
"aux_brier/mean_group_std": 0.18036753840812028,
"aux_brier/mean_r": 0.4919380842477488,
"aux_brier/n_active_tok": 80.0,
"aux_brier/n_groups": 6.78125,
"aux_brier/n_step_records": 20.0,
"calib/answer_extract_rate": 0.59375,
"calib/auroc": 0.4339560799737791,
"calib/avg_num_step_conf": 2.54296875,
"calib/ece": 0.6953455515957019,
"calib/final_conf_rate": 0.546875,
"calib/format_rate": 0.4765625,
"calib/frac_conf_gt_0.9": 0.6857142857142857,
"calib/gap": 0.008877805647903636,
"calib/mean_conf": 0.875797305547155,
"calib/mu_c": 0.882962962962963,
"calib/mu_w": 0.8740851573150593,
"calib/nonempty_final_conf_rate": 0.546875,
"calib/nonempty_reasoning_rate": 0.66015625,
"calib/nonempty_step_conf_rate": 0.578125,
"calib/pce": 0.689142857142857,
"calib/std_conf": 0.2066548389307303,
"calib/step_conf_rate": 0.578125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 3068.0,
"completions/max_terminated_length": 3068.0,
"completions/mean_length": 373.3359375,
"completions/mean_terminated_length": 391.6966857910156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0224,
"grad_norm": 0.7015453577041626,
"learning_rate": 4.9722222222222224e-06,
"loss": 0.0818,
"num_tokens": 5551788.0,
"reward": 0.3778233528137207,
"reward_std": 0.4428941011428833,
"rewards/accuracy_reward_step": 0.11328125,
"rewards/final_brier_reward_step": 0.15973085165023804,
"rewards/format_reward_step_strict": 0.44921875,
"step": 21
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.202951125493627e-09,
"aux_brier/mean_group_std": 0.2020469017186471,
"aux_brier/mean_r": 0.5072843637279042,
"aux_brier/n_active_tok": 103.875,
"aux_brier/n_groups": 7.875,
"aux_brier/n_step_records": 25.96875,
"calib/answer_extract_rate": 0.6796875,
"calib/auroc": 0.494388327721661,
"calib/avg_num_step_conf": 3.3203125,
"calib/ece": 0.6857202380952381,
"calib/final_conf_rate": 0.65625,
"calib/format_rate": 0.59765625,
"calib/frac_conf_gt_0.9": 0.7142857142857143,
"calib/gap": 0.035859259259259035,
"calib/mean_conf": 0.8678511904761905,
"calib/mu_c": 0.8966666666666665,
"calib/mu_w": 0.8608074074074075,
"calib/nonempty_final_conf_rate": 0.65625,
"calib/nonempty_reasoning_rate": 0.76953125,
"calib/nonempty_step_conf_rate": 0.72265625,
"calib/pce": 0.6785714285714285,
"calib/std_conf": 0.21995826551873732,
"calib/step_conf_rate": 0.72265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3061.0,
"completions/max_terminated_length": 3061.0,
"completions/mean_length": 367.03125,
"completions/mean_terminated_length": 371.3834228515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.26140135526657104,
"learning_rate": 4.944444444444445e-06,
"loss": 0.1531,
"num_tokens": 5747564.0,
"reward": 0.461717426776886,
"reward_std": 0.49233466386795044,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.20624473690986633,
"rewards/format_reward_step_strict": 0.5546875,
"step": 22
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.447986701710093e-09,
"aux_brier/mean_group_std": 0.2216020534625502,
"aux_brier/mean_r": 0.5362284046828788,
"aux_brier/n_active_tok": 108.625,
"aux_brier/n_groups": 9.09375,
"aux_brier/n_step_records": 27.15625,
"calib/answer_extract_rate": 0.73046875,
"calib/auroc": 0.4558353317346123,
"calib/avg_num_step_conf": 3.40625,
"calib/ece": 0.7014285714285713,
"calib/final_conf_rate": 0.68359375,
"calib/format_rate": 0.60546875,
"calib/frac_conf_gt_0.9": 0.7142857142857143,
"calib/gap": 0.0043964828137490475,
"calib/mean_conf": 0.8842857142857141,
"calib/mu_c": 0.8877777777777778,
"calib/mu_w": 0.8833812949640287,
"calib/nonempty_final_conf_rate": 0.68359375,
"calib/nonempty_reasoning_rate": 0.7890625,
"calib/nonempty_step_conf_rate": 0.703125,
"calib/pce": 0.69,
"calib/std_conf": 0.20523276979059257,
"calib/step_conf_rate": 0.703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2979.0,
"completions/max_terminated_length": 2979.0,
"completions/mean_length": 373.30859375,
"completions/mean_terminated_length": 377.7351989746094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 11.0,
"epoch": 0.024533333333333334,
"grad_norm": 1.195592999458313,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.1399,
"num_tokens": 5947067.0,
"reward": 0.4916580021381378,
"reward_std": 0.4707548916339874,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.20881952345371246,
"rewards/format_reward_step_strict": 0.59765625,
"step": 23
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.324486682100395e-09,
"aux_brier/mean_group_std": 0.21149588199044533,
"aux_brier/mean_r": 0.5487196136341741,
"aux_brier/n_active_tok": 113.625,
"aux_brier/n_groups": 8.21875,
"aux_brier/n_step_records": 28.40625,
"calib/answer_extract_rate": 0.77734375,
"calib/auroc": 0.5435070306038048,
"calib/avg_num_step_conf": 3.70703125,
"calib/ece": 0.6639690721649485,
"calib/final_conf_rate": 0.7578125,
"calib/format_rate": 0.67578125,
"calib/frac_conf_gt_0.9": 0.6597938144329897,
"calib/gap": 0.04734656741108345,
"calib/mean_conf": 0.8634536082474227,
"calib/mu_c": 0.9012820512820513,
"calib/mu_w": 0.8539354838709678,
"calib/nonempty_final_conf_rate": 0.7578125,
"calib/nonempty_reasoning_rate": 0.859375,
"calib/nonempty_step_conf_rate": 0.79296875,
"calib/pce": 0.6631958762886598,
"calib/std_conf": 0.20837350092812282,
"calib/step_conf_rate": 0.79296875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2626.0,
"completions/max_terminated_length": 2626.0,
"completions/mean_length": 332.3359375,
"completions/mean_terminated_length": 334.9527587890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 23.0,
"epoch": 0.0256,
"grad_norm": 0.965747058391571,
"learning_rate": 4.888888888888889e-06,
"loss": 0.0643,
"num_tokens": 6136657.0,
"reward": 0.5342280268669128,
"reward_std": 0.44181251525878906,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.25409960746765137,
"rewards/format_reward_step_strict": 0.63671875,
"step": 24
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.3584580010883585e-07,
"aux_brier/mean_group_std": 0.19688430394395745,
"aux_brier/mean_r": 0.5162810116111987,
"aux_brier/n_active_tok": 124.25,
"aux_brier/n_groups": 8.15625,
"aux_brier/n_step_records": 31.0625,
"calib/answer_extract_rate": 0.875,
"calib/auroc": 0.5254296008869179,
"calib/avg_num_step_conf": 4.00390625,
"calib/ece": 0.6919815668202767,
"calib/final_conf_rate": 0.84765625,
"calib/format_rate": 0.76953125,
"calib/frac_conf_gt_0.9": 0.7050691244239631,
"calib/gap": 0.0659243348115296,
"calib/mean_conf": 0.8809216589861752,
"calib/mu_c": 0.9343902439024389,
"calib/mu_w": 0.8684659090909093,
"calib/nonempty_final_conf_rate": 0.84765625,
"calib/nonempty_reasoning_rate": 0.9375,
"calib/nonempty_step_conf_rate": 0.87890625,
"calib/pce": 0.6919815668202767,
"calib/std_conf": 0.20345648736224245,
"calib/step_conf_rate": 0.87890625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2002.0,
"completions/max_terminated_length": 2002.0,
"completions/mean_length": 284.24609375,
"completions/mean_terminated_length": 285.3608093261719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.481380432844162,
"learning_rate": 4.861111111111111e-06,
"loss": 0.0614,
"num_tokens": 6312648.0,
"reward": 0.6156750321388245,
"reward_std": 0.44895535707473755,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.2673875093460083,
"rewards/format_reward_step_strict": 0.75390625,
"step": 25
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.948377328895971e-09,
"aux_brier/mean_group_std": 0.2135516025891162,
"aux_brier/mean_r": 0.5502119437421992,
"aux_brier/n_active_tok": 131.625,
"aux_brier/n_groups": 7.40625,
"aux_brier/n_step_records": 32.90625,
"calib/answer_extract_rate": 0.90234375,
"calib/auroc": 0.5338899108532617,
"calib/avg_num_step_conf": 4.2265625,
"calib/ece": 0.7080394736842105,
"calib/final_conf_rate": 0.890625,
"calib/format_rate": 0.82421875,
"calib/frac_conf_gt_0.9": 0.7017543859649122,
"calib/gap": 0.03606608178859494,
"calib/mean_conf": 0.8700570175438596,
"calib/mu_c": 0.9002702702702703,
"calib/mu_w": 0.8642041884816754,
"calib/nonempty_final_conf_rate": 0.890625,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.92578125,
"calib/pce": 0.707907894736842,
"calib/std_conf": 0.21965012309024798,
"calib/step_conf_rate": 0.92578125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2184.0,
"completions/max_terminated_length": 2184.0,
"completions/mean_length": 294.57421875,
"completions/mean_terminated_length": 296.8937072753906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 16.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.09537330269813538,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0627,
"num_tokens": 6493299.0,
"reward": 0.6087180972099304,
"reward_std": 0.4283045530319214,
"rewards/accuracy_reward_step": 0.1484375,
"rewards/final_brier_reward_step": 0.27080994844436646,
"rewards/format_reward_step_strict": 0.78515625,
"step": 26
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.891819546659626e-09,
"aux_brier/mean_group_std": 0.21629333705944145,
"aux_brier/mean_r": 0.5526936772971238,
"aux_brier/n_active_tok": 133.125,
"aux_brier/n_groups": 7.46875,
"aux_brier/n_step_records": 33.28125,
"calib/answer_extract_rate": 0.921875,
"calib/auroc": 0.5033190160093712,
"calib/avg_num_step_conf": 4.203125,
"calib/ece": 0.6975080508474576,
"calib/final_conf_rate": 0.921875,
"calib/format_rate": 0.859375,
"calib/frac_conf_gt_0.9": 0.6228813559322034,
"calib/gap": 0.008753859169595146,
"calib/mean_conf": 0.8508978813559321,
"calib/mu_c": 0.8582051282051282,
"calib/mu_w": 0.849451269035533,
"calib/nonempty_final_conf_rate": 0.921875,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.9375,
"calib/pce": 0.6915758474576271,
"calib/std_conf": 0.21551845008263174,
"calib/step_conf_rate": 0.9375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1233.0,
"completions/max_terminated_length": 1233.0,
"completions/mean_length": 268.7734375,
"completions/mean_terminated_length": 268.7734375,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"epoch": 0.0288,
"grad_norm": 0.08425747603178024,
"learning_rate": 4.805555555555556e-06,
"loss": 0.0392,
"num_tokens": 6667321.0,
"reward": 0.6474834680557251,
"reward_std": 0.41931381821632385,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.29305893182754517,
"rewards/format_reward_step_strict": 0.84375,
"step": 27
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.8908671231148153e-08,
"aux_brier/mean_group_std": 0.2179180649672654,
"aux_brier/mean_r": 0.5355714524093081,
"aux_brier/n_active_tok": 129.625,
"aux_brier/n_groups": 7.65625,
"aux_brier/n_step_records": 32.40625,
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5541794033941183,
"calib/avg_num_step_conf": 4.125,
"calib/ece": 0.6512853448275864,
"calib/final_conf_rate": 0.90625,
"calib/format_rate": 0.8515625,
"calib/frac_conf_gt_0.9": 0.6767241379310345,
"calib/gap": 0.03645571835142836,
"calib/mean_conf": 0.8620612068965517,
"calib/mu_c": 0.890188679245283,
"calib/mu_w": 0.8537329608938546,
"calib/nonempty_final_conf_rate": 0.90625,
"calib/nonempty_reasoning_rate": 0.96484375,
"calib/nonempty_step_conf_rate": 0.91796875,
"calib/pce": 0.6424491379310346,
"calib/std_conf": 0.22141740083301234,
"calib/step_conf_rate": 0.91796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2241.0,
"completions/max_terminated_length": 2241.0,
"completions/mean_length": 278.6328125,
"completions/mean_terminated_length": 279.7254943847656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.029866666666666666,
"grad_norm": 1.87068772315979,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0243,
"num_tokens": 6845595.0,
"reward": 0.697235107421875,
"reward_std": 0.46421217918395996,
"rewards/accuracy_reward_step": 0.20703125,
"rewards/final_brier_reward_step": 0.3280029296875,
"rewards/format_reward_step_strict": 0.81640625,
"step": 28
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.332386866450321e-10,
"aux_brier/mean_group_std": 0.2190098622058668,
"aux_brier/mean_r": 0.5798270102251177,
"aux_brier/n_active_tok": 140.25,
"aux_brier/n_groups": 8.5,
"aux_brier/n_step_records": 35.0625,
"calib/answer_extract_rate": 0.90625,
"calib/auroc": 0.4625898156825992,
"calib/avg_num_step_conf": 4.42578125,
"calib/ece": 0.7391189427312777,
"calib/final_conf_rate": 0.88671875,
"calib/format_rate": 0.83203125,
"calib/frac_conf_gt_0.9": 0.6784140969162996,
"calib/gap": 0.014039987503905005,
"calib/mean_conf": 0.8780616740088105,
"calib/mu_c": 0.8900606060606061,
"calib/mu_w": 0.8760206185567011,
"calib/nonempty_final_conf_rate": 0.88671875,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.9453125,
"calib/pce": 0.7359030837004406,
"calib/std_conf": 0.19273721737857602,
"calib/step_conf_rate": 0.9453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2458.0,
"completions/max_terminated_length": 2458.0,
"completions/mean_length": 305.74609375,
"completions/mean_terminated_length": 306.94512939453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 25.0,
"epoch": 0.030933333333333334,
"grad_norm": 5.483370304107666,
"learning_rate": 4.75e-06,
"loss": 0.1293,
"num_tokens": 7030994.0,
"reward": 0.5947896838188171,
"reward_std": 0.399245023727417,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.25415873527526855,
"rewards/format_reward_step_strict": 0.796875,
"step": 29
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.4154402643018482e-08,
"aux_brier/mean_group_std": 0.22314070917171333,
"aux_brier/mean_r": 0.5867208662667103,
"aux_brier/n_active_tok": 144.75,
"aux_brier/n_groups": 8.1875,
"aux_brier/n_step_records": 36.1875,
"calib/answer_extract_rate": 0.9296875,
"calib/auroc": 0.5369423131571469,
"calib/avg_num_step_conf": 4.58203125,
"calib/ece": 0.7144398340248963,
"calib/final_conf_rate": 0.94140625,
"calib/format_rate": 0.875,
"calib/frac_conf_gt_0.9": 0.6307053941908713,
"calib/gap": 0.05795112247797651,
"calib/mean_conf": 0.8555186721991701,
"calib/mu_c": 0.9052941176470588,
"calib/mu_w": 0.8473429951690823,
"calib/nonempty_final_conf_rate": 0.94140625,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.94921875,
"calib/pce": 0.7144398340248963,
"calib/std_conf": 0.20074922079885882,
"calib/step_conf_rate": 0.94921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2062.0,
"completions/max_terminated_length": 2062.0,
"completions/mean_length": 277.85546875,
"completions/mean_terminated_length": 281.15020751953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 13.0,
"epoch": 0.032,
"grad_norm": 1.1227530241012573,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0154,
"num_tokens": 7209109.0,
"reward": 0.6433886289596558,
"reward_std": 0.3881877660751343,
"rewards/accuracy_reward_step": 0.13671875,
"rewards/final_brier_reward_step": 0.3079296946525574,
"rewards/format_reward_step_strict": 0.859375,
"step": 30
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 9.103466834525786e-09,
"aux_brier/mean_group_std": 0.22134998099592884,
"aux_brier/mean_r": 0.6055208892957307,
"aux_brier/n_active_tok": 143.875,
"aux_brier/n_groups": 8.6875,
"aux_brier/n_step_records": 35.96875,
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5408590571907155,
"calib/avg_num_step_conf": 4.5234375,
"calib/ece": 0.6568340248962655,
"calib/final_conf_rate": 0.94140625,
"calib/format_rate": 0.90234375,
"calib/frac_conf_gt_0.9": 0.5809128630705395,
"calib/gap": 0.05719262981574558,
"calib/mean_conf": 0.8311078838174275,
"calib/mu_c": 0.8783333333333335,
"calib/mu_w": 0.8211407035175879,
"calib/nonempty_final_conf_rate": 0.94140625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.6568340248962655,
"calib/std_conf": 0.22082825224999478,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1854.0,
"completions/max_terminated_length": 1854.0,
"completions/mean_length": 245.19921875,
"completions/mean_terminated_length": 245.19921875,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.18373198807239532,
"learning_rate": 4.694444444444445e-06,
"loss": 0.0428,
"num_tokens": 7377792.0,
"reward": 0.6938906908035278,
"reward_std": 0.39770248532295227,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.3536878526210785,
"rewards/format_reward_step_strict": 0.8828125,
"step": 31
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.489524627060426e-09,
"aux_brier/mean_group_std": 0.2142543951952405,
"aux_brier/mean_r": 0.6805003927594797,
"aux_brier/n_active_tok": 139.625,
"aux_brier/n_groups": 8.03125,
"aux_brier/n_step_records": 34.90625,
"calib/answer_extract_rate": 0.94921875,
"calib/auroc": 0.5390853658536585,
"calib/avg_num_step_conf": 4.41015625,
"calib/ece": 0.6308571428571428,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.92578125,
"calib/frac_conf_gt_0.9": 0.46938775510204084,
"calib/gap": 0.041695121951219494,
"calib/mean_conf": 0.7866122448979591,
"calib/mu_c": 0.8215,
"calib/mu_w": 0.7798048780487805,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.6271020408163266,
"calib/std_conf": 0.24054435004767497,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2191.0,
"completions/max_terminated_length": 2191.0,
"completions/mean_length": 271.12109375,
"completions/mean_terminated_length": 271.12109375,
"completions/min_length": 56.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.5932040214538574,
"learning_rate": 4.666666666666667e-06,
"loss": 0.1114,
"num_tokens": 7553903.0,
"reward": 0.710291862487793,
"reward_std": 0.34065794944763184,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.4036675691604614,
"rewards/format_reward_step_strict": 0.90625,
"step": 32
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.159146687830127e-08,
"aux_brier/mean_group_std": 0.21152325473294545,
"aux_brier/mean_r": 0.6525623588887066,
"aux_brier/n_active_tok": 142.0,
"aux_brier/n_groups": 7.875,
"aux_brier/n_step_records": 35.5,
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.4629950495049505,
"calib/avg_num_step_conf": 4.4609375,
"calib/ece": 0.6564462809917354,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.9140625,
"calib/frac_conf_gt_0.9": 0.47520661157024796,
"calib/gap": -0.05904207920792082,
"calib/mean_conf": 0.7960330578512396,
"calib/mu_c": 0.74675,
"calib/mu_w": 0.8057920792079208,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.6435950413223139,
"calib/std_conf": 0.22625948439585752,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2353.0,
"completions/max_terminated_length": 2353.0,
"completions/mean_length": 273.57421875,
"completions/mean_terminated_length": 274.6470642089844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 14.0,
"epoch": 0.0352,
"grad_norm": 0.8261376023292542,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0665,
"num_tokens": 7730810.0,
"reward": 0.7009158134460449,
"reward_std": 0.3576313257217407,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.36616331338882446,
"rewards/format_reward_step_strict": 0.90625,
"step": 33
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.266226020898211e-08,
"aux_brier/mean_group_std": 0.17297121526688108,
"aux_brier/mean_r": 0.755081980971118,
"aux_brier/n_active_tok": 136.5,
"aux_brier/n_groups": 7.84375,
"aux_brier/n_step_records": 34.125,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4553470919324578,
"calib/avg_num_step_conf": 4.30078125,
"calib/ece": 0.6106693989071038,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.9296875,
"calib/frac_conf_gt_0.9": 0.4344262295081967,
"calib/gap": 0.00845153220762962,
"calib/mean_conf": 0.7664890710382515,
"calib/mu_c": 0.7735897435897435,
"calib/mu_w": 0.7651382113821139,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.6086612021857923,
"calib/std_conf": 0.2548164727056724,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2135.0,
"completions/max_terminated_length": 2135.0,
"completions/mean_length": 237.875,
"completions/mean_terminated_length": 237.875,
"completions/min_length": 62.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.4891229569911957,
"learning_rate": 4.611111111111112e-06,
"loss": -0.001,
"num_tokens": 7896818.0,
"reward": 0.7069133520126343,
"reward_std": 0.3561670184135437,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.4057784676551819,
"rewards/format_reward_step_strict": 0.90625,
"step": 34
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.2876700296315935e-08,
"aux_brier/mean_group_std": 0.19110142701219532,
"aux_brier/mean_r": 0.7578070904330929,
"aux_brier/n_active_tok": 143.0,
"aux_brier/n_groups": 7.4375,
"aux_brier/n_step_records": 35.75,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4994965767217076,
"calib/avg_num_step_conf": 4.484375,
"calib/ece": 0.5534279835390946,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.921875,
"calib/frac_conf_gt_0.9": 0.3950617283950617,
"calib/gap": 0.005865485300040163,
"calib/mean_conf": 0.7473127572016462,
"calib/mu_c": 0.7519230769230769,
"calib/mu_w": 0.7460575916230368,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.5433744855967078,
"calib/std_conf": 0.2638745355196102,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1265.0,
"completions/max_terminated_length": 1265.0,
"completions/mean_length": 245.8203125,
"completions/mean_terminated_length": 245.8203125,
"completions/min_length": 30.0,
"completions/min_terminated_length": 30.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.13622772693634033,
"learning_rate": 4.583333333333333e-06,
"loss": -0.0141,
"num_tokens": 8069004.0,
"reward": 0.7694624662399292,
"reward_std": 0.4192921817302704,
"rewards/accuracy_reward_step": 0.20703125,
"rewards/final_brier_reward_step": 0.4372250735759735,
"rewards/format_reward_step_strict": 0.90625,
"step": 35
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.5424452007306577e-08,
"aux_brier/mean_group_std": 0.17299765102865022,
"aux_brier/mean_r": 0.8162546674847609,
"aux_brier/n_active_tok": 148.25,
"aux_brier/n_groups": 7.875,
"aux_brier/n_step_records": 37.0625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4595408895265424,
"calib/avg_num_step_conf": 4.66796875,
"calib/ece": 0.39436507936507936,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.27380952380952384,
"calib/gap": -0.03577044476327118,
"calib/mean_conf": 0.6473015873015873,
"calib/mu_c": 0.6231707317073171,
"calib/mu_w": 0.6589411764705883,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3581349206349207,
"calib/std_conf": 0.2916657488377741,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 775.0,
"completions/max_terminated_length": 775.0,
"completions/mean_length": 235.9375,
"completions/mean_terminated_length": 235.9375,
"completions/min_length": 40.0,
"completions/min_terminated_length": 40.0,
"epoch": 0.0384,
"grad_norm": 0.325826495885849,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0025,
"num_tokens": 8232116.0,
"reward": 0.9381535053253174,
"reward_std": 0.4128226637840271,
"rewards/accuracy_reward_step": 0.3203125,
"rewards/final_brier_reward_step": 0.5573015213012695,
"rewards/format_reward_step_strict": 0.95703125,
"step": 36
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.690912870306761e-08,
"aux_brier/mean_group_std": 0.1140466228276657,
"aux_brier/mean_r": 0.8719295000616452,
"aux_brier/n_active_tok": 142.875,
"aux_brier/n_groups": 7.875,
"aux_brier/n_step_records": 35.71875,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4106198034769463,
"calib/avg_num_step_conf": 4.5,
"calib/ece": 0.4436,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.204,
"calib/gap": -0.08550075585789862,
"calib/mean_conf": 0.59944,
"calib/mu_c": 0.5324074074074074,
"calib/mu_w": 0.6179081632653061,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.41352,
"calib/std_conf": 0.2927874423536638,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1383.0,
"completions/max_terminated_length": 1383.0,
"completions/mean_length": 252.17578125,
"completions/mean_terminated_length": 253.1647186279297,
"completions/min_length": 0.0,
"completions/min_terminated_length": 52.0,
"epoch": 0.039466666666666664,
"grad_norm": 1.0115748643875122,
"learning_rate": 4.527777777777778e-06,
"loss": -0.0142,
"num_tokens": 8403769.0,
"reward": 0.8041490316390991,
"reward_std": 0.37829023599624634,
"rewards/accuracy_reward_step": 0.2109375,
"rewards/final_brier_reward_step": 0.5369086265563965,
"rewards/format_reward_step_strict": 0.91796875,
"step": 37
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.7883934211890846e-08,
"aux_brier/mean_group_std": 0.13285248793464477,
"aux_brier/mean_r": 0.8842191220307359,
"aux_brier/n_active_tok": 141.0,
"aux_brier/n_groups": 7.625,
"aux_brier/n_step_records": 35.25,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4564952048823016,
"calib/avg_num_step_conf": 4.4765625,
"calib/ece": 0.3612145748987855,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.1700404858299595,
"calib/gap": -0.040134263295553696,
"calib/mean_conf": 0.5548987854251012,
"calib/mu_c": 0.5248387096774193,
"calib/mu_w": 0.564972972972973,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.33255060728744945,
"calib/std_conf": 0.30877564454226697,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2200.0,
"completions/max_terminated_length": 2200.0,
"completions/mean_length": 242.1484375,
"completions/mean_terminated_length": 242.1484375,
"completions/min_length": 53.0,
"completions/min_terminated_length": 53.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.6419054269790649,
"learning_rate": 4.5e-06,
"loss": -0.0197,
"num_tokens": 8572647.0,
"reward": 0.8677629828453064,
"reward_std": 0.37110045552253723,
"rewards/accuracy_reward_step": 0.2578125,
"rewards/final_brier_reward_step": 0.5804269313812256,
"rewards/format_reward_step_strict": 0.9296875,
"step": 38
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.4320344233229605e-08,
"aux_brier/mean_group_std": 0.10604813857507032,
"aux_brier/mean_r": 0.9041069110310074,
"aux_brier/n_active_tok": 136.375,
"aux_brier/n_groups": 7.65625,
"aux_brier/n_step_records": 34.09375,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.45264994948103243,
"calib/avg_num_step_conf": 4.28125,
"calib/ece": 0.3555645161290323,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.16129032258064516,
"calib/gap": -0.059753834848902454,
"calib/mean_conf": 0.49637096774193545,
"calib/mu_c": 0.45035087719298245,
"calib/mu_w": 0.5101047120418849,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3110483870967742,
"calib/std_conf": 0.31395971253110144,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1043.0,
"completions/max_terminated_length": 1043.0,
"completions/mean_length": 246.81640625,
"completions/mean_terminated_length": 247.78433227539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 53.0,
"epoch": 0.0416,
"grad_norm": 0.026621559634804726,
"learning_rate": 4.472222222222223e-06,
"loss": -0.0226,
"num_tokens": 8741920.0,
"reward": 0.8384658098220825,
"reward_std": 0.35034242272377014,
"rewards/accuracy_reward_step": 0.22265625,
"rewards/final_brier_reward_step": 0.5960507392883301,
"rewards/format_reward_step_strict": 0.93359375,
"step": 39
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.717222444686973e-07,
"aux_brier/mean_group_std": 0.08273824353456184,
"aux_brier/mean_r": 0.923825084156272,
"aux_brier/n_active_tok": 159.25,
"aux_brier/n_groups": 9.03125,
"aux_brier/n_step_records": 39.8125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5053658536585366,
"calib/avg_num_step_conf": 4.98828125,
"calib/ece": 0.3427584,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.116,
"calib/gap": 0.019038699186991803,
"calib/mean_conf": 0.4617216,
"calib/mu_c": 0.4773333333333333,
"calib/mu_w": 0.4582946341463415,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.31224,
"calib/std_conf": 0.30503245193493755,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 766.0,
"completions/max_terminated_length": 766.0,
"completions/mean_length": 260.7109375,
"completions/mean_terminated_length": 261.73333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.34497296810150146,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0293,
"num_tokens": 8915422.0,
"reward": 0.8194528818130493,
"reward_std": 0.2984590530395508,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.6606241464614868,
"rewards/format_reward_step_strict": 0.94921875,
"step": 40
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.04391604064136e-07,
"aux_brier/mean_group_std": 0.07820914098556263,
"aux_brier/mean_r": 0.9344050996274995,
"aux_brier/n_active_tok": 150.25,
"aux_brier/n_groups": 8.34375,
"aux_brier/n_step_records": 37.5625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5114382472873039,
"calib/avg_num_step_conf": 4.7421875,
"calib/ece": 0.21831999999999996,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.068,
"calib/gap": 0.0020402239270163602,
"calib/mean_conf": 0.38112,
"calib/mu_c": 0.38241758241758245,
"calib/mu_w": 0.3803773584905661,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.11771999999999999,
"calib/std_conf": 0.278381654567969,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 851.0,
"completions/max_terminated_length": 851.0,
"completions/mean_length": 244.734375,
"completions/mean_terminated_length": 245.6941375732422,
"completions/min_length": 0.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.36892974376678467,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0224,
"num_tokens": 9085322.0,
"reward": 1.0038294792175293,
"reward_std": 0.37469637393951416,
"rewards/accuracy_reward_step": 0.3671875,
"rewards/final_brier_reward_step": 0.6559433341026306,
"rewards/format_reward_step_strict": 0.9453125,
"step": 41
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 6.246743733395732e-09,
"aux_brier/mean_group_std": 0.06735158377045987,
"aux_brier/mean_r": 0.9480342624386815,
"aux_brier/n_active_tok": 149.375,
"aux_brier/n_groups": 8.0625,
"aux_brier/n_step_records": 37.34375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5320121951219513,
"calib/avg_num_step_conf": 4.67578125,
"calib/ece": 0.21748031496062997,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.05905511811023622,
"calib/gap": 0.01180516165626777,
"calib/mean_conf": 0.3445669291338583,
"calib/mu_c": 0.35256097560975613,
"calib/mu_w": 0.34075581395348836,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11960629921259844,
"calib/std_conf": 0.2856509513250953,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2133.0,
"completions/max_terminated_length": 2133.0,
"completions/mean_length": 243.15234375,
"completions/mean_terminated_length": 243.15234375,
"completions/min_length": 69.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.0448,
"grad_norm": 0.6862514615058899,
"learning_rate": 4.388888888888889e-06,
"loss": -0.025,
"num_tokens": 9251937.0,
"reward": 0.9844409823417664,
"reward_std": 0.32201334834098816,
"rewards/accuracy_reward_step": 0.3203125,
"rewards/final_brier_reward_step": 0.695576548576355,
"rewards/format_reward_step_strict": 0.98046875,
"step": 42
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.117350127537001e-07,
"aux_brier/mean_group_std": 0.05748583749375196,
"aux_brier/mean_r": 0.9567184962157195,
"aux_brier/n_active_tok": 156.75,
"aux_brier/n_groups": 9.5,
"aux_brier/n_step_records": 39.1875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4571264367816092,
"calib/avg_num_step_conf": 4.9140625,
"calib/ece": 0.2486746987951807,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.060240963855421686,
"calib/gap": -0.04055172413793101,
"calib/mean_conf": 0.3110040160642571,
"calib/mu_c": 0.2826666666666667,
"calib/mu_w": 0.3232183908045977,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.12923694779116465,
"calib/std_conf": 0.2726505994518655,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1094.0,
"completions/max_terminated_length": 1094.0,
"completions/mean_length": 263.7890625,
"completions/mean_terminated_length": 264.82354736328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 47.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.4393234848976135,
"learning_rate": 4.361111111111112e-06,
"loss": -0.0137,
"num_tokens": 9424691.0,
"reward": 0.9548882842063904,
"reward_std": 0.3085918426513672,
"rewards/accuracy_reward_step": 0.30078125,
"rewards/final_brier_reward_step": 0.6789281368255615,
"rewards/format_reward_step_strict": 0.96875,
"step": 43
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.494938199428589e-09,
"aux_brier/mean_group_std": 0.05338743252005995,
"aux_brier/mean_r": 0.9590074203253443,
"aux_brier/n_active_tok": 171.75,
"aux_brier/n_groups": 9.0625,
"aux_brier/n_step_records": 42.9375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5048535043191735,
"calib/avg_num_step_conf": 5.4140625,
"calib/ece": 0.19866141732283465,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.05905511811023622,
"calib/gap": 0.003357378217116458,
"calib/mean_conf": 0.28283464566929134,
"calib/mu_c": 0.2854385964912281,
"calib/mu_w": 0.28208121827411164,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.12854330708661418,
"calib/std_conf": 0.26993407663503965,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1177.0,
"completions/max_terminated_length": 1177.0,
"completions/mean_length": 290.1796875,
"completions/mean_terminated_length": 291.3176574707031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.09494109451770782,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0134,
"num_tokens": 9605297.0,
"reward": 0.8937559723854065,
"reward_std": 0.25675415992736816,
"rewards/accuracy_reward_step": 0.2265625,
"rewards/final_brier_reward_step": 0.7390863299369812,
"rewards/format_reward_step_strict": 0.96484375,
"step": 44
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.827950360139724e-07,
"aux_brier/mean_group_std": 0.05122791971037719,
"aux_brier/mean_r": 0.9636901903632081,
"aux_brier/n_active_tok": 176.625,
"aux_brier/n_groups": 10.375,
"aux_brier/n_step_records": 44.15625,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5433559792534152,
"calib/avg_num_step_conf": 5.5625,
"calib/ece": 0.191884,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.032,
"calib/gap": 0.03134567901234561,
"calib/mean_conf": 0.22115600000000002,
"calib/mu_c": 0.24234567901234566,
"calib/mu_w": 0.21100000000000005,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04451999999999999,
"calib/std_conf": 0.2293511448935889,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2795.0,
"completions/max_terminated_length": 2795.0,
"completions/mean_length": 309.34765625,
"completions/mean_terminated_length": 309.34765625,
"completions/min_length": 75.0,
"completions/min_terminated_length": 75.0,
"epoch": 0.048,
"grad_norm": 0.03964204341173172,
"learning_rate": 4.305555555555556e-06,
"loss": 0.0625,
"num_tokens": 9789538.0,
"reward": 0.9754707217216492,
"reward_std": 0.3682977855205536,
"rewards/accuracy_reward_step": 0.31640625,
"rewards/final_brier_reward_step": 0.7143828868865967,
"rewards/format_reward_step_strict": 0.9609375,
"step": 45
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.2189748136437473e-06,
"aux_brier/mean_group_std": 0.07619248927643729,
"aux_brier/mean_r": 0.9463890522674315,
"aux_brier/n_active_tok": 183.5,
"aux_brier/n_groups": 11.78125,
"aux_brier/n_step_records": 45.875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5472061657032756,
"calib/avg_num_step_conf": 5.7578125,
"calib/ece": 0.22392519685039372,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.027559055118110236,
"calib/gap": 0.005120887747092018,
"calib/mean_conf": 0.19741338582677165,
"calib/mu_c": 0.20090123456790124,
"calib/mu_w": 0.19578034682080922,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.05122047244094488,
"calib/std_conf": 0.21766503915337101,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2082.0,
"completions/max_terminated_length": 2082.0,
"completions/mean_length": 304.21484375,
"completions/mean_terminated_length": 304.21484375,
"completions/min_length": 82.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.08802807331085205,
"learning_rate": 4.277777777777778e-06,
"loss": 0.0079,
"num_tokens": 9972185.0,
"reward": 0.9807976484298706,
"reward_std": 0.2961314618587494,
"rewards/accuracy_reward_step": 0.31640625,
"rewards/final_brier_reward_step": 0.7122529745101929,
"rewards/format_reward_step_strict": 0.97265625,
"step": 46
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 8.231699693272976e-07,
"aux_brier/mean_group_std": 0.05437717597896134,
"aux_brier/mean_r": 0.9577333014632524,
"aux_brier/n_active_tok": 208.625,
"aux_brier/n_groups": 14.84375,
"aux_brier/n_step_records": 52.15625,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5303409090909091,
"calib/avg_num_step_conf": 6.578125,
"calib/ece": 0.19926693227091632,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.00796812749003984,
"calib/gap": 0.001556818181818187,
"calib/mean_conf": 0.15690836653386453,
"calib/mu_c": 0.158,
"calib/mu_w": 0.15644318181818181,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.028685258964143426,
"calib/std_conf": 0.17983189242065695,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2550.0,
"completions/max_terminated_length": 2550.0,
"completions/mean_length": 340.875,
"completions/mean_terminated_length": 340.875,
"completions/min_length": 77.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.2760876417160034,
"learning_rate": 4.25e-06,
"loss": 0.1166,
"num_tokens": 10165425.0,
"reward": 0.9496136903762817,
"reward_std": 0.28500351309776306,
"rewards/accuracy_reward_step": 0.29296875,
"rewards/final_brier_reward_step": 0.712517261505127,
"rewards/format_reward_step_strict": 0.95703125,
"step": 47
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.8346091193444103e-07,
"aux_brier/mean_group_std": 0.06138813673286345,
"aux_brier/mean_r": 0.9542347008064548,
"aux_brier/n_active_tok": 171.875,
"aux_brier/n_groups": 9.71875,
"aux_brier/n_step_records": 42.96875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5143207282913165,
"calib/avg_num_step_conf": 5.4296875,
"calib/ece": 0.2421652173913044,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.011857707509881422,
"calib/gap": -0.010842226890756285,
"calib/mean_conf": 0.14684664031620553,
"calib/mu_c": 0.13964705882352943,
"calib/mu_w": 0.15048928571428571,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.02652173913043478,
"calib/std_conf": 0.15776694035490982,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1459.0,
"completions/max_terminated_length": 1459.0,
"completions/mean_length": 286.203125,
"completions/mean_terminated_length": 287.32550048828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.0512,
"grad_norm": 0.04931079223752022,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0162,
"num_tokens": 10342381.0,
"reward": 0.9951045513153076,
"reward_std": 0.2820585072040558,
"rewards/accuracy_reward_step": 0.33203125,
"rewards/final_brier_reward_step": 0.6991681456565857,
"rewards/format_reward_step_strict": 0.9765625,
"step": 48
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.0588506880182535e-06,
"aux_brier/mean_group_std": 0.056505151484209745,
"aux_brier/mean_r": 0.9558889455944084,
"aux_brier/n_active_tok": 195.125,
"aux_brier/n_groups": 11.6875,
"aux_brier/n_step_records": 48.78125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5084617501375894,
"calib/avg_num_step_conf": 6.1640625,
"calib/ece": 0.26581460000000007,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009991455696202509,
"calib/mean_conf": 0.1081854,
"calib/mu_c": 0.11449999999999999,
"calib/mu_w": 0.10450854430379748,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.003,
"calib/std_conf": 0.10756204403431538,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2422.0,
"completions/max_terminated_length": 2422.0,
"completions/mean_length": 357.16015625,
"completions/mean_terminated_length": 357.16015625,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.8869736194610596,
"learning_rate": 4.194444444444445e-06,
"loss": 0.0765,
"num_tokens": 10538350.0,
"reward": 1.0061066150665283,
"reward_std": 0.28344491124153137,
"rewards/accuracy_reward_step": 0.359375,
"rewards/final_brier_reward_step": 0.6728639602661133,
"rewards/format_reward_step_strict": 0.95703125,
"step": 49
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.108546392681765e-06,
"aux_brier/mean_group_std": 0.04090366449781096,
"aux_brier/mean_r": 0.9695145255332592,
"aux_brier/n_active_tok": 197.5,
"aux_brier/n_groups": 13.0625,
"aux_brier/n_step_records": 49.375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5207922095012502,
"calib/avg_num_step_conf": 6.28125,
"calib/ece": 0.3382629482071713,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.01195219123505976,
"calib/gap": -0.01959244637452298,
"calib/mean_conf": 0.126199203187251,
"calib/mu_c": 0.1145686274509804,
"calib/mu_w": 0.13416107382550338,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.02904382470119522,
"calib/std_conf": 0.14819941932346195,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1389.0,
"completions/max_terminated_length": 1389.0,
"completions/mean_length": 349.48828125,
"completions/mean_terminated_length": 350.8588562011719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 26.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.14290562272071838,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0249,
"num_tokens": 10733179.0,
"reward": 1.0418678522109985,
"reward_std": 0.2925070524215698,
"rewards/accuracy_reward_step": 0.40234375,
"rewards/final_brier_reward_step": 0.6284089088439941,
"rewards/format_reward_step_strict": 0.96484375,
"step": 50
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.3822614484959583e-06,
"aux_brier/mean_group_std": 0.047827093132465026,
"aux_brier/mean_r": 0.9717914343826168,
"aux_brier/n_active_tok": 195.125,
"aux_brier/n_groups": 10.6875,
"aux_brier/n_step_records": 48.78125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4762771350578448,
"calib/avg_num_step_conf": 6.109375,
"calib/ece": 0.31774901960784313,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.00392156862745098,
"calib/gap": -0.01997738203561679,
"calib/mean_conf": 0.10805490196078434,
"calib/mu_c": 0.09575510204081635,
"calib/mu_w": 0.11573248407643313,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.020745098039215683,
"calib/std_conf": 0.12316498151686303,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1002.0,
"completions/max_terminated_length": 1002.0,
"completions/mean_length": 331.3203125,
"completions/mean_terminated_length": 332.61962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.0544,
"grad_norm": 0.16508427262306213,
"learning_rate": 4.138888888888889e-06,
"loss": 0.0142,
"num_tokens": 10927293.0,
"reward": 1.0353152751922607,
"reward_std": 0.2406586855649948,
"rewards/accuracy_reward_step": 0.3828125,
"rewards/final_brier_reward_step": 0.6490738391876221,
"rewards/format_reward_step_strict": 0.98046875,
"step": 51
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.570171554061034e-07,
"aux_brier/mean_group_std": 0.07408634926746249,
"aux_brier/mean_r": 0.9532624709956302,
"aux_brier/n_active_tok": 166.0,
"aux_brier/n_groups": 9.15625,
"aux_brier/n_step_records": 41.5,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5591278640059127,
"calib/avg_num_step_conf": 5.265625,
"calib/ece": 0.4136469411764705,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.00392156862745098,
"calib/gap": 0.007451731337767942,
"calib/mean_conf": 0.07654913725490195,
"calib/mu_c": 0.08040650406504068,
"calib/mu_w": 0.07295477272727274,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.00392156862745098,
"calib/std_conf": 0.09322034923034896,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1027.0,
"completions/max_terminated_length": 1027.0,
"completions/mean_length": 310.77734375,
"completions/mean_terminated_length": 311.99609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.18967147171497345,
"learning_rate": 4.111111111111111e-06,
"loss": -0.0302,
"num_tokens": 11114804.0,
"reward": 1.116457462310791,
"reward_std": 0.31173473596572876,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.5752047300338745,
"rewards/format_reward_step_strict": 0.984375,
"step": 52
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.1322278472023513e-06,
"aux_brier/mean_group_std": 0.05321139141871668,
"aux_brier/mean_r": 0.9492036096971312,
"aux_brier/n_active_tok": 202.5,
"aux_brier/n_groups": 12.8125,
"aux_brier/n_step_records": 50.625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5176851851851852,
"calib/avg_num_step_conf": 6.3828125,
"calib/ece": 0.3905490196078432,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.001962962962962972,
"calib/mean_conf": 0.08929411764705882,
"calib/mu_c": 0.09033333333333333,
"calib/mu_w": 0.08837037037037036,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.004627450980392158,
"calib/std_conf": 0.09922588492882152,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2000.0,
"completions/max_terminated_length": 2000.0,
"completions/mean_length": 375.09765625,
"completions/mean_terminated_length": 375.09765625,
"completions/min_length": 51.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.22604136168956757,
"learning_rate": 4.083333333333334e-06,
"loss": 0.0584,
"num_tokens": 11316653.0,
"reward": 1.1153674125671387,
"reward_std": 0.2891712188720703,
"rewards/accuracy_reward_step": 0.46875,
"rewards/final_brier_reward_step": 0.5942816734313965,
"rewards/format_reward_step_strict": 0.99609375,
"step": 53
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.1980869404636394e-06,
"aux_brier/mean_group_std": 0.06465943559997471,
"aux_brier/mean_r": 0.9513168879103168,
"aux_brier/n_active_tok": 188.75,
"aux_brier/n_groups": 11.8125,
"aux_brier/n_step_records": 47.1875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.47375172068577154,
"calib/avg_num_step_conf": 5.97265625,
"calib/ece": 0.4015019762845849,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0025910399199098877,
"calib/mean_conf": 0.08584980237154151,
"calib/mu_c": 0.08450819672131149,
"calib/mu_w": 0.08709923664122138,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.002569169960474308,
"calib/std_conf": 0.09009749424007524,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1119.0,
"completions/max_terminated_length": 1119.0,
"completions/mean_length": 322.02734375,
"completions/mean_terminated_length": 323.29022216796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.0576,
"grad_norm": 0.770313560962677,
"learning_rate": 4.055555555555556e-06,
"loss": 0.0044,
"num_tokens": 11505324.0,
"reward": 1.113966464996338,
"reward_std": 0.2687970697879791,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5730531215667725,
"rewards/format_reward_step_strict": 0.97265625,
"step": 54
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.4401057884478337e-06,
"aux_brier/mean_group_std": 0.07322190067020191,
"aux_brier/mean_r": 0.9466186695298467,
"aux_brier/n_active_tok": 189.375,
"aux_brier/n_groups": 12.28125,
"aux_brier/n_step_records": 47.34375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4996967859308672,
"calib/avg_num_step_conf": 5.91796875,
"calib/ece": 0.31548,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.008,
"calib/gap": 0.014952496462502546,
"calib/mean_conf": 0.09187999999999999,
"calib/mu_c": 0.10103092783505158,
"calib/mu_w": 0.08607843137254903,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.009680000000000001,
"calib/std_conf": 0.1265703978029618,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2173.0,
"completions/max_terminated_length": 2173.0,
"completions/mean_length": 358.234375,
"completions/mean_terminated_length": 359.6392517089844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.14670225977897644,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0283,
"num_tokens": 11704856.0,
"reward": 1.027816891670227,
"reward_std": 0.3024364113807678,
"rewards/accuracy_reward_step": 0.37890625,
"rewards/final_brier_reward_step": 0.6503300666809082,
"rewards/format_reward_step_strict": 0.97265625,
"step": 55
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.122842637646393e-06,
"aux_brier/mean_group_std": 0.05879436083996134,
"aux_brier/mean_r": 0.9589549762011267,
"aux_brier/n_active_tok": 187.75,
"aux_brier/n_groups": 11.1875,
"aux_brier/n_step_records": 46.9375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5174732100197939,
"calib/avg_num_step_conf": 5.87890625,
"calib/ece": 0.2992234126984126,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0008867176301958768,
"calib/mean_conf": 0.07284007936507936,
"calib/mu_c": 0.0734065934065934,
"calib/mu_w": 0.07251987577639753,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0054761904761904765,
"calib/std_conf": 0.08554317217839719,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2558.0,
"completions/max_terminated_length": 2558.0,
"completions/mean_length": 367.98828125,
"completions/mean_terminated_length": 367.98828125,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.16074801981449127,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0882,
"num_tokens": 11905901.0,
"reward": 1.0079951286315918,
"reward_std": 0.2715536057949066,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.6647930145263672,
"rewards/format_reward_step_strict": 0.97265625,
"step": 56
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.1918806131226987e-07,
"aux_brier/mean_group_std": 0.04542189579562134,
"aux_brier/mean_r": 0.9712381790108774,
"aux_brier/n_active_tok": 177.75,
"aux_brier/n_groups": 9.5,
"aux_brier/n_step_records": 44.4375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.38972431077694236,
"calib/avg_num_step_conf": 5.5546875,
"calib/ece": 0.46893280632411066,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.02461716791979951,
"calib/mean_conf": 0.06814229249011859,
"calib/mu_c": 0.05646616541353383,
"calib/mu_w": 0.08108333333333334,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0056916996047430835,
"calib/std_conf": 0.07984240597810757,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1121.0,
"completions/max_terminated_length": 1121.0,
"completions/mean_length": 338.27734375,
"completions/mean_terminated_length": 339.60394287109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 61.0,
"epoch": 0.0608,
"grad_norm": 0.06650305539369583,
"learning_rate": 3.972222222222223e-06,
"loss": 0.0277,
"num_tokens": 12099292.0,
"reward": 1.144626259803772,
"reward_std": 0.25971370935440063,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5160050988197327,
"rewards/format_reward_step_strict": 0.984375,
"step": 57
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.9477825325964204e-06,
"aux_brier/mean_group_std": 0.037870995762718844,
"aux_brier/mean_r": 0.9764260570047406,
"aux_brier/n_active_tok": 221.875,
"aux_brier/n_groups": 16.0625,
"aux_brier/n_step_records": 55.46875,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.49853854585312385,
"calib/avg_num_step_conf": 6.93359375,
"calib/ece": 0.2776422764227642,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0005334307636097879,
"calib/mean_conf": 0.06788617886178863,
"calib/mu_c": 0.06823529411764707,
"calib/mu_w": 0.06770186335403729,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.06247929130117005,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2753.0,
"completions/max_terminated_length": 2753.0,
"completions/mean_length": 450.12890625,
"completions/mean_terminated_length": 451.8941345214844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.2836063802242279,
"learning_rate": 3.944444444444445e-06,
"loss": 0.1286,
"num_tokens": 12320845.0,
"reward": 0.9760801792144775,
"reward_std": 0.32149428129196167,
"rewards/accuracy_reward_step": 0.33203125,
"rewards/final_brier_reward_step": 0.6621332168579102,
"rewards/format_reward_step_strict": 0.95703125,
"step": 58
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.1172077150642963e-06,
"aux_brier/mean_group_std": 0.039330208826736476,
"aux_brier/mean_r": 0.9636895815931609,
"aux_brier/n_active_tok": 205.875,
"aux_brier/n_groups": 12.4375,
"aux_brier/n_step_records": 51.46875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.48166023166023164,
"calib/avg_num_step_conf": 6.44921875,
"calib/ece": 0.37063241106719375,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.008438867438867431,
"calib/mean_conf": 0.057984189723320166,
"calib/mu_c": 0.05304761904761905,
"calib/mu_w": 0.06148648648648648,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.006798418972332016,
"calib/std_conf": 0.054789281457994804,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2499.0,
"completions/max_terminated_length": 2499.0,
"completions/mean_length": 380.55078125,
"completions/mean_terminated_length": 382.04315185546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.3582397699356079,
"learning_rate": 3.916666666666667e-06,
"loss": -0.0261,
"num_tokens": 12524514.0,
"reward": 1.0533034801483154,
"reward_std": 0.27245858311653137,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/final_brier_reward_step": 0.6116515398025513,
"rewards/format_reward_step_strict": 0.98046875,
"step": 59
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.900711513822209e-07,
"aux_brier/mean_group_std": 0.0503927898602,
"aux_brier/mean_r": 0.9692935058392566,
"aux_brier/n_active_tok": 183.5,
"aux_brier/n_groups": 11.21875,
"aux_brier/n_step_records": 45.875,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4329994763026971,
"calib/avg_num_step_conf": 5.92578125,
"calib/ece": 0.41379032258064513,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.017140612725844454,
"calib/mean_conf": 0.057419354838709684,
"calib/mu_c": 0.04815789473684211,
"calib/mu_w": 0.06529850746268656,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.005766129032258064,
"calib/std_conf": 0.059809792160593934,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2440.0,
"completions/max_terminated_length": 2440.0,
"completions/mean_length": 385.80859375,
"completions/mean_terminated_length": 387.32159423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.064,
"grad_norm": 0.4637250602245331,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0458,
"num_tokens": 12732137.0,
"reward": 1.0627686977386475,
"reward_std": 0.28386422991752625,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.5557624697685242,
"rewards/format_reward_step_strict": 0.95703125,
"step": 60
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.201558979095466e-06,
"aux_brier/mean_group_std": 0.04909083466379876,
"aux_brier/mean_r": 0.9660164374378446,
"aux_brier/n_active_tok": 188.25,
"aux_brier/n_groups": 13.6875,
"aux_brier/n_step_records": 47.0625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4598214285714286,
"calib/avg_num_step_conf": 5.91015625,
"calib/ece": 0.5002390438247012,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.00398406374501992,
"calib/gap": -0.009219552929085308,
"calib/mean_conf": 0.06087649402390439,
"calib/mu_c": 0.05676258992805756,
"calib/mu_w": 0.06598214285714286,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0036653386454183266,
"calib/std_conf": 0.0767571627739752,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2202.0,
"completions/max_terminated_length": 2202.0,
"completions/mean_length": 351.453125,
"completions/mean_terminated_length": 352.8313903808594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.34882640838623047,
"learning_rate": 3.861111111111112e-06,
"loss": 0.0681,
"num_tokens": 12926173.0,
"reward": 1.1527085304260254,
"reward_std": 0.2640078663825989,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.4858339726924896,
"rewards/format_reward_step_strict": 0.9765625,
"step": 61
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -6.954978353346242e-06,
"aux_brier/mean_group_std": 0.04640384300669598,
"aux_brier/mean_r": 0.9693615416108637,
"aux_brier/n_active_tok": 199.625,
"aux_brier/n_groups": 13.28125,
"aux_brier/n_step_records": 49.90625,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.4310763888888889,
"calib/avg_num_step_conf": 6.2421875,
"calib/ece": 0.3576639344262295,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0021555555555555425,
"calib/mean_conf": 0.05217213114754098,
"calib/mu_c": 0.05090000000000001,
"calib/mu_w": 0.05305555555555555,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.05048732881494173,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2842.0,
"completions/max_terminated_length": 2842.0,
"completions/mean_length": 418.1953125,
"completions/mean_terminated_length": 421.4881896972656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.06613333333333334,
"grad_norm": 1.137746810913086,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0835,
"num_tokens": 13140311.0,
"reward": 1.0145448446273804,
"reward_std": 0.3309886157512665,
"rewards/accuracy_reward_step": 0.390625,
"rewards/final_brier_reward_step": 0.5972417593002319,
"rewards/format_reward_step_strict": 0.94921875,
"step": 62
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.3098082090046148e-06,
"aux_brier/mean_group_std": 0.05808807354168096,
"aux_brier/mean_r": 0.9589815650320522,
"aux_brier/n_active_tok": 208.25,
"aux_brier/n_groups": 14.375,
"aux_brier/n_step_records": 52.0625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5473721272645797,
"calib/avg_num_step_conf": 6.55859375,
"calib/ece": 0.45544,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.006080276550796987,
"calib/mean_conf": 0.055040000000000006,
"calib/mu_c": 0.05803149606299211,
"calib/mu_w": 0.051951219512195126,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.00124,
"calib/std_conf": 0.04809364199143168,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2942.0,
"completions/max_terminated_length": 2942.0,
"completions/mean_length": 410.234375,
"completions/mean_terminated_length": 411.8431701660156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.0672,
"grad_norm": 0.4824405610561371,
"learning_rate": 3.8055555555555556e-06,
"loss": 0.0679,
"num_tokens": 13353971.0,
"reward": 1.1175824403762817,
"reward_std": 0.32058730721473694,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5328296422958374,
"rewards/format_reward_step_strict": 0.9765625,
"step": 63
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 6.089682363930038e-07,
"aux_brier/mean_group_std": 0.03929437676435583,
"aux_brier/mean_r": 0.966472297348743,
"aux_brier/n_active_tok": 206.75,
"aux_brier/n_groups": 13.53125,
"aux_brier/n_step_records": 51.6875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5280862603305785,
"calib/avg_num_step_conf": 6.46484375,
"calib/ece": 0.46465060240963857,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0057679493801652895,
"calib/mean_conf": 0.05149397590361446,
"calib/mu_c": 0.05429687500000001,
"calib/mu_w": 0.04852892561983472,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.001044176706827309,
"calib/std_conf": 0.04849329192064222,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2827.0,
"completions/max_terminated_length": 2827.0,
"completions/mean_length": 415.7890625,
"completions/mean_terminated_length": 415.7890625,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.17579436302185059,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0318,
"num_tokens": 13564189.0,
"reward": 1.118802785873413,
"reward_std": 0.2620909810066223,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.5220866799354553,
"rewards/format_reward_step_strict": 0.96875,
"step": 64
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.4793348144536154e-06,
"aux_brier/mean_group_std": 0.04025712574762179,
"aux_brier/mean_r": 0.9747281687179985,
"aux_brier/n_active_tok": 162.625,
"aux_brier/n_groups": 9.4375,
"aux_brier/n_step_records": 40.65625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5354064039408868,
"calib/avg_num_step_conf": 5.1875,
"calib/ece": 0.40806250000000005,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0025655172413793156,
"calib/mean_conf": 0.045062500000000005,
"calib/mu_c": 0.04646551724137932,
"calib/mu_w": 0.0439,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.035292029464880595,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 869.0,
"completions/max_terminated_length": 869.0,
"completions/mean_length": 320.703125,
"completions/mean_terminated_length": 321.9608154296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.18231400847434998,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0034,
"num_tokens": 13751313.0,
"reward": 1.0917396545410156,
"reward_std": 0.20031163096427917,
"rewards/accuracy_reward_step": 0.453125,
"rewards/final_brier_reward_step": 0.5778957009315491,
"rewards/format_reward_step_strict": 0.98828125,
"step": 65
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.3928245592431665e-06,
"aux_brier/mean_group_std": 0.03936907017730624,
"aux_brier/mean_r": 0.9696451228882946,
"aux_brier/n_active_tok": 205.5,
"aux_brier/n_groups": 15.1875,
"aux_brier/n_step_records": 51.375,
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.4852816901408451,
"calib/avg_num_step_conf": 6.65625,
"calib/ece": 0.36332644628099175,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.9296875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0002499999999999933,
"calib/mean_conf": 0.04989669421487604,
"calib/mu_c": 0.04975,
"calib/mu_w": 0.049999999999999996,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.03719628621354623,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2605.0,
"completions/max_terminated_length": 2605.0,
"completions/mean_length": 439.7109375,
"completions/mean_terminated_length": 444.9249267578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.0704,
"grad_norm": 0.2523045539855957,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0963,
"num_tokens": 13970231.0,
"reward": 0.9941548109054565,
"reward_std": 0.27787861227989197,
"rewards/accuracy_reward_step": 0.390625,
"rewards/final_brier_reward_step": 0.5781815648078918,
"rewards/format_reward_step_strict": 0.91796875,
"step": 66
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.2032974873464894e-06,
"aux_brier/mean_group_std": 0.04711280885917892,
"aux_brier/mean_r": 0.9700329708544827,
"aux_brier/n_active_tok": 169.625,
"aux_brier/n_groups": 9.53125,
"aux_brier/n_step_records": 42.40625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4241315136476427,
"calib/avg_num_step_conf": 5.30859375,
"calib/ece": 0.4459448818897637,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": -0.015548387096774197,
"calib/mean_conf": 0.04940944881889764,
"calib/mu_c": 0.041451612903225805,
"calib/mu_w": 0.057,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0035826771653543307,
"calib/std_conf": 0.06277730347808565,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2163.0,
"completions/max_terminated_length": 2163.0,
"completions/mean_length": 375.67578125,
"completions/mean_terminated_length": 375.67578125,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.12285065650939941,
"learning_rate": 3.694444444444445e-06,
"loss": 0.0227,
"num_tokens": 14171412.0,
"reward": 1.1061146259307861,
"reward_std": 0.22164198756217957,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5338335633277893,
"rewards/format_reward_step_strict": 0.9765625,
"step": 67
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.1232037882043e-06,
"aux_brier/mean_group_std": 0.030176616952614116,
"aux_brier/mean_r": 0.980834637625183,
"aux_brier/n_active_tok": 186.25,
"aux_brier/n_groups": 12.03125,
"aux_brier/n_step_records": 46.5625,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.3720454545454545,
"calib/avg_num_step_conf": 5.82421875,
"calib/ece": 0.3940000000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.015194805194805198,
"calib/mean_conf": 0.0476,
"calib/mu_c": 0.039090909090909086,
"calib/mu_w": 0.054285714285714284,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0008,
"calib/std_conf": 0.03278780261011707,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2373.0,
"completions/max_terminated_length": 2373.0,
"completions/mean_length": 390.41796875,
"completions/mean_terminated_length": 390.41796875,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.41778209805488586,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0667,
"num_tokens": 14375447.0,
"reward": 1.058363914489746,
"reward_std": 0.2913435101509094,
"rewards/accuracy_reward_step": 0.4296875,
"rewards/final_brier_reward_step": 0.5772062540054321,
"rewards/format_reward_step_strict": 0.96875,
"step": 68
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.1240967232605268e-06,
"aux_brier/mean_group_std": 0.050432826361544364,
"aux_brier/mean_r": 0.9661016659552276,
"aux_brier/n_active_tok": 167.375,
"aux_brier/n_groups": 10.21875,
"aux_brier/n_step_records": 41.84375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4689986282578875,
"calib/avg_num_step_conf": 5.23046875,
"calib/ece": 0.3199603174603175,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.003968253968253968,
"calib/gap": -0.01727160493827161,
"calib/mean_conf": 0.0521031746031746,
"calib/mu_c": 0.041,
"calib/mu_w": 0.05827160493827161,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.00746031746031746,
"calib/std_conf": 0.08617500920884266,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2448.0,
"completions/max_terminated_length": 2448.0,
"completions/mean_length": 419.3984375,
"completions/mean_terminated_length": 419.3984375,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.0736,
"grad_norm": 0.0447511188685894,
"learning_rate": 3.638888888888889e-06,
"loss": 0.0633,
"num_tokens": 14587309.0,
"reward": 0.9966806173324585,
"reward_std": 0.2783878743648529,
"rewards/accuracy_reward_step": 0.3515625,
"rewards/final_brier_reward_step": 0.6429726481437683,
"rewards/format_reward_step_strict": 0.96875,
"step": 69
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.4102203126980015e-06,
"aux_brier/mean_group_std": 0.05047837073747982,
"aux_brier/mean_r": 0.9658483138018061,
"aux_brier/n_active_tok": 195.0,
"aux_brier/n_groups": 14.65625,
"aux_brier/n_step_records": 48.75,
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.5441705002875216,
"calib/avg_num_step_conf": 6.1015625,
"calib/ece": 0.34613636363636363,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0027666762507188025,
"calib/mean_conf": 0.043946280991735545,
"calib/mu_c": 0.04563829787234042,
"calib/mu_w": 0.04287162162162162,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.0008264462809917356,
"calib/std_conf": 0.03549277086520069,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2830.0,
"completions/max_terminated_length": 2830.0,
"completions/mean_length": 449.90234375,
"completions/mean_terminated_length": 453.44488525390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.40294215083122253,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.1819,
"num_tokens": 14809476.0,
"reward": 0.9851260185241699,
"reward_std": 0.2996982932090759,
"rewards/accuracy_reward_step": 0.3671875,
"rewards/final_brier_reward_step": 0.6045666933059692,
"rewards/format_reward_step_strict": 0.93359375,
"step": 70
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.141000971593975e-06,
"aux_brier/mean_group_std": 0.049329877547884066,
"aux_brier/mean_r": 0.9649408123850951,
"aux_brier/n_active_tok": 196.375,
"aux_brier/n_groups": 11.875,
"aux_brier/n_step_records": 49.09375,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5205055802070727,
"calib/avg_num_step_conf": 6.140625,
"calib/ece": 0.4036530612244898,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0004803684281296447,
"calib/mean_conf": 0.05144897959183674,
"calib/mu_c": 0.05171171171171173,
"calib/mu_w": 0.05123134328358209,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0010204081632653062,
"calib/std_conf": 0.0350457783561932,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2731.0,
"completions/max_terminated_length": 2731.0,
"completions/mean_length": 437.84765625,
"completions/mean_terminated_length": 441.2952880859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.34749463200569153,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.0656,
"num_tokens": 15025973.0,
"reward": 1.0552055835723877,
"reward_std": 0.31829553842544556,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.5645725727081299,
"rewards/format_reward_step_strict": 0.953125,
"step": 71
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.897831544780184e-06,
"aux_brier/mean_group_std": 0.02546374854744683,
"aux_brier/mean_r": 0.9808279287671078,
"aux_brier/n_active_tok": 195.0,
"aux_brier/n_groups": 12.34375,
"aux_brier/n_step_records": 48.75,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5002283105022831,
"calib/avg_num_step_conf": 6.09375,
"calib/ece": 0.3670916334661355,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.000158512720156552,
"calib/mean_conf": 0.05123505976095617,
"calib/mu_c": 0.05114285714285715,
"calib/mu_w": 0.0513013698630137,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.03471667518287719,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2620.0,
"completions/max_terminated_length": 2620.0,
"completions/mean_length": 417.3359375,
"completions/mean_terminated_length": 418.9725646972656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.0768,
"grad_norm": 0.2863672971725464,
"learning_rate": 3.555555555555556e-06,
"loss": 0.0628,
"num_tokens": 15237219.0,
"reward": 1.0475611686706543,
"reward_std": 0.2560883164405823,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/final_brier_reward_step": 0.6043074131011963,
"rewards/format_reward_step_strict": 0.97265625,
"step": 72
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.610961066298103e-06,
"aux_brier/mean_group_std": 0.04337953391220954,
"aux_brier/mean_r": 0.9738971359935481,
"aux_brier/n_active_tok": 174.5,
"aux_brier/n_groups": 9.625,
"aux_brier/n_step_records": 43.625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.44381250000000005,
"calib/avg_num_step_conf": 5.58203125,
"calib/ece": 0.44490118577075094,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.004210000000000005,
"calib/mean_conf": 0.04916996047430831,
"calib/mu_c": 0.04704,
"calib/mu_w": 0.051250000000000004,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.034670994113304016,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1412.0,
"completions/max_terminated_length": 1412.0,
"completions/mean_length": 388.73828125,
"completions/mean_terminated_length": 390.26275634765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.3675096035003662,
"learning_rate": 3.5277777777777784e-06,
"loss": 0.0033,
"num_tokens": 15443768.0,
"reward": 1.1199455261230469,
"reward_std": 0.28744661808013916,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.5422824621200562,
"rewards/format_reward_step_strict": 0.984375,
"step": 73
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.4108433704105323e-06,
"aux_brier/mean_group_std": 0.02549180726143498,
"aux_brier/mean_r": 0.985227529095446,
"aux_brier/n_active_tok": 201.375,
"aux_brier/n_groups": 12.21875,
"aux_brier/n_step_records": 50.34375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5235201012978791,
"calib/avg_num_step_conf": 6.29296875,
"calib/ece": 0.4140873015873016,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0018518518518518476,
"calib/mean_conf": 0.05321428571428572,
"calib/mu_c": 0.052222222222222225,
"calib/mu_w": 0.05407407407407407,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0015079365079365078,
"calib/std_conf": 0.03993528637064806,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2280.0,
"completions/max_terminated_length": 2280.0,
"completions/mean_length": 409.79296875,
"completions/mean_terminated_length": 411.4000244140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.23363105952739716,
"learning_rate": 3.5e-06,
"loss": 0.0495,
"num_tokens": 15652603.0,
"reward": 1.0918989181518555,
"reward_std": 0.28715503215789795,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/final_brier_reward_step": 0.5707206726074219,
"rewards/format_reward_step_strict": 0.984375,
"step": 74
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.5379526267244046e-06,
"aux_brier/mean_group_std": 0.05564478248335544,
"aux_brier/mean_r": 0.9594891161755524,
"aux_brier/n_active_tok": 180.125,
"aux_brier/n_groups": 9.0625,
"aux_brier/n_step_records": 45.03125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5691056910569106,
"calib/avg_num_step_conf": 5.64453125,
"calib/ece": 0.5977952755905512,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002180216802168025,
"calib/mean_conf": 0.05062992125984252,
"calib/mu_c": 0.05140243902439024,
"calib/mu_w": 0.049222222222222216,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0013779527559055118,
"calib/std_conf": 0.03421212271898196,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 874.0,
"completions/max_terminated_length": 874.0,
"completions/mean_length": 366.98828125,
"completions/mean_terminated_length": 368.4274597167969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.08,
"grad_norm": 0.13181133568286896,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.0055,
"num_tokens": 15851304.0,
"reward": 1.2383170127868652,
"reward_std": 0.26131200790405273,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/final_brier_reward_step": 0.4063929617404938,
"rewards/format_reward_step_strict": 0.984375,
"step": 75
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.3144727633029873e-06,
"aux_brier/mean_group_std": 0.04790952991891247,
"aux_brier/mean_r": 0.9658543030835265,
"aux_brier/n_active_tok": 191.875,
"aux_brier/n_groups": 12.28125,
"aux_brier/n_step_records": 47.96875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.45326492537313434,
"calib/avg_num_step_conf": 5.99609375,
"calib/ece": 0.4740551181102362,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.010891791044776139,
"calib/mean_conf": 0.053503937007874015,
"calib/mu_c": 0.04835820895522387,
"calib/mu_w": 0.05925000000000001,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.041430213406705566,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2190.0,
"completions/max_terminated_length": 2190.0,
"completions/mean_length": 405.296875,
"completions/mean_terminated_length": 405.296875,
"completions/min_length": 107.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.19137753546237946,
"learning_rate": 3.444444444444445e-06,
"loss": 0.0287,
"num_tokens": 16058116.0,
"reward": 1.1433509588241577,
"reward_std": 0.27575671672821045,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5109038949012756,
"rewards/format_reward_step_strict": 0.984375,
"step": 76
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.123670227174081e-06,
"aux_brier/mean_group_std": 0.055487238056417,
"aux_brier/mean_r": 0.9620829956995886,
"aux_brier/n_active_tok": 183.625,
"aux_brier/n_groups": 11.375,
"aux_brier/n_step_records": 45.90625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5336568371282816,
"calib/avg_num_step_conf": 5.77734375,
"calib/ece": 0.5063882352941176,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0015235162374020034,
"calib/mean_conf": 0.046552941176470596,
"calib/mu_c": 0.04723404255319149,
"calib/mu_w": 0.045710526315789486,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.02907266321222467,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1458.0,
"completions/max_terminated_length": 1458.0,
"completions/mean_length": 361.859375,
"completions/mean_terminated_length": 363.2784423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.22153005003929138,
"learning_rate": 3.416666666666667e-06,
"loss": -0.006,
"num_tokens": 16255416.0,
"reward": 1.159740686416626,
"reward_std": 0.25410184264183044,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.48271265625953674,
"rewards/format_reward_step_strict": 0.9765625,
"step": 77
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.0395815451318455e-06,
"aux_brier/mean_group_std": 0.05266590438357986,
"aux_brier/mean_r": 0.9663016684179435,
"aux_brier/n_active_tok": 189.875,
"aux_brier/n_groups": 10.96875,
"aux_brier/n_step_records": 47.46875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5655609695152424,
"calib/avg_num_step_conf": 5.9609375,
"calib/ece": 0.4081141732283464,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.008458020989505238,
"calib/mean_conf": 0.05023228346456693,
"calib/mu_c": 0.054827586206896546,
"calib/mu_w": 0.04636956521739131,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.000826771653543307,
"calib/std_conf": 0.03471885367199788,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2197.0,
"completions/max_terminated_length": 2197.0,
"completions/mean_length": 421.3125,
"completions/mean_terminated_length": 421.3125,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.0832,
"grad_norm": 0.2403065264225006,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.0122,
"num_tokens": 16471296.0,
"reward": 1.082634687423706,
"reward_std": 0.2751431167125702,
"rewards/accuracy_reward_step": 0.453125,
"rewards/final_brier_reward_step": 0.5805391073226929,
"rewards/format_reward_step_strict": 0.96875,
"step": 78
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -6.425094732087633e-07,
"aux_brier/mean_group_std": 0.05248357026304297,
"aux_brier/mean_r": 0.9611150219353268,
"aux_brier/n_active_tok": 208.75,
"aux_brier/n_groups": 12.5625,
"aux_brier/n_step_records": 52.1875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.48201392882243943,
"calib/avg_num_step_conf": 6.625,
"calib/ece": 0.5057539682539682,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006382978723404181,
"calib/mean_conf": 0.057023809523809525,
"calib/mu_c": 0.05730496453900708,
"calib/mu_w": 0.056666666666666664,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0016269841269841267,
"calib/std_conf": 0.050992835520065144,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2436.0,
"completions/max_terminated_length": 2436.0,
"completions/mean_length": 439.96875,
"completions/mean_terminated_length": 441.69415283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.1738610863685608,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.0231,
"num_tokens": 16690304.0,
"reward": 1.1637357473373413,
"reward_std": 0.21450775861740112,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.49088048934936523,
"rewards/format_reward_step_strict": 0.98046875,
"step": 79
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.0338823488195175e-06,
"aux_brier/mean_group_std": 0.05436176500071805,
"aux_brier/mean_r": 0.9586528520801569,
"aux_brier/n_active_tok": 201.0,
"aux_brier/n_groups": 11.875,
"aux_brier/n_step_records": 50.25,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4549990585577104,
"calib/avg_num_step_conf": 6.2890625,
"calib/ece": 0.5021653543307086,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.005202410092261363,
"calib/mean_conf": 0.052952755905511815,
"calib/mu_c": 0.05063829787234042,
"calib/mu_w": 0.055840707964601784,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.03126502366868463,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2954.0,
"completions/max_terminated_length": 2954.0,
"completions/mean_length": 400.28125,
"completions/mean_terminated_length": 400.28125,
"completions/min_length": 152.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.01653141714632511,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0454,
"num_tokens": 16894936.0,
"reward": 1.1604695320129395,
"reward_std": 0.3131512403488159,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.4856281280517578,
"rewards/format_reward_step_strict": 0.9765625,
"step": 80
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.933250929093578e-06,
"aux_brier/mean_group_std": 0.05645298643859676,
"aux_brier/mean_r": 0.9614778421297897,
"aux_brier/n_active_tok": 215.5,
"aux_brier/n_groups": 14.375,
"aux_brier/n_step_records": 53.875,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4622797175103953,
"calib/avg_num_step_conf": 6.8125,
"calib/ece": 0.5088306451612903,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.005889380238928127,
"calib/mean_conf": 0.051653225806451625,
"calib/mu_c": 0.04906474820143885,
"calib/mu_w": 0.05495412844036698,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.035685802805208645,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2594.0,
"completions/max_terminated_length": 2594.0,
"completions/mean_length": 450.7734375,
"completions/mean_terminated_length": 452.54119873046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.0864,
"grad_norm": 0.3035520017147064,
"learning_rate": 3.3055555555555558e-06,
"loss": 0.0802,
"num_tokens": 17116582.0,
"reward": 1.139133334159851,
"reward_std": 0.2880188822746277,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.4705960750579834,
"rewards/format_reward_step_strict": 0.95703125,
"step": 81
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.6320311266037066e-06,
"aux_brier/mean_group_std": 0.0444149556308205,
"aux_brier/mean_r": 0.9713469110688953,
"aux_brier/n_active_tok": 193.5,
"aux_brier/n_groups": 10.40625,
"aux_brier/n_step_records": 48.375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5341914191419141,
"calib/avg_num_step_conf": 6.1953125,
"calib/ece": 0.548406374501992,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011518151815181482,
"calib/mean_conf": 0.049203187250996025,
"calib/mu_c": 0.049666666666666665,
"calib/mu_w": 0.048514851485148516,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.027078213221623286,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2275.0,
"completions/max_terminated_length": 2275.0,
"completions/mean_length": 382.77734375,
"completions/mean_terminated_length": 384.2784423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.4123811721801758,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0258,
"num_tokens": 17320125.0,
"reward": 1.1856541633605957,
"reward_std": 0.23773129284381866,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.44574177265167236,
"rewards/format_reward_step_strict": 0.9765625,
"step": 82
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -9.098681497815564e-06,
"aux_brier/mean_group_std": 0.06456052063731982,
"aux_brier/mean_r": 0.950112133064767,
"aux_brier/n_active_tok": 222.375,
"aux_brier/n_groups": 13.28125,
"aux_brier/n_step_records": 55.59375,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4394692665289256,
"calib/avg_num_step_conf": 7.1328125,
"calib/ece": 0.4631726907630522,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0072843491735537155,
"calib/mean_conf": 0.05088353413654619,
"calib/mu_c": 0.04734375000000001,
"calib/mu_w": 0.054628099173553726,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.03790739204226033,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2622.0,
"completions/max_terminated_length": 2622.0,
"completions/mean_length": 478.51953125,
"completions/mean_terminated_length": 482.28741455078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.2955869436264038,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0294,
"num_tokens": 17549890.0,
"reward": 1.1055645942687988,
"reward_std": 0.25129184126853943,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.508195698261261,
"rewards/format_reward_step_strict": 0.95703125,
"step": 83
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.6381674451680936e-06,
"aux_brier/mean_group_std": 0.057917053717292825,
"aux_brier/mean_r": 0.9582471606331168,
"aux_brier/n_active_tok": 207.625,
"aux_brier/n_groups": 13.59375,
"aux_brier/n_step_records": 51.90625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5576023391812865,
"calib/avg_num_step_conf": 6.640625,
"calib/ece": 0.4935341365461848,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0065419103313840085,
"calib/mean_conf": 0.048634538152610436,
"calib/mu_c": 0.05162962962962962,
"calib/mu_w": 0.045087719298245614,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.02946209483413582,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2865.0,
"completions/max_terminated_length": 2865.0,
"completions/mean_length": 415.23046875,
"completions/mean_terminated_length": 416.8588562011719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.0896,
"grad_norm": 0.25714656710624695,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.0668,
"num_tokens": 17762109.0,
"reward": 1.1339223384857178,
"reward_std": 0.27987125515937805,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.48881447315216064,
"rewards/format_reward_step_strict": 0.9609375,
"step": 84
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.243233125311718e-06,
"aux_brier/mean_group_std": 0.05724537831026044,
"aux_brier/mean_r": 0.9563792340685818,
"aux_brier/n_active_tok": 217.875,
"aux_brier/n_groups": 13.40625,
"aux_brier/n_step_records": 54.46875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5025252525252525,
"calib/avg_num_step_conf": 6.87890625,
"calib/ece": 0.47930522088353417,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0002839937839937967,
"calib/mean_conf": 0.05081526104417672,
"calib/mu_c": 0.050681818181818175,
"calib/mu_w": 0.05096581196581197,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.030760344654199002,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2615.0,
"completions/max_terminated_length": 2615.0,
"completions/mean_length": 444.10546875,
"completions/mean_terminated_length": 445.8470764160156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.1907009482383728,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.0192,
"num_tokens": 17983624.0,
"reward": 1.124513030052185,
"reward_std": 0.23452921211719513,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.5058649778366089,
"rewards/format_reward_step_strict": 0.96484375,
"step": 85
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.1181750219635411e-06,
"aux_brier/mean_group_std": 0.0637853497743649,
"aux_brier/mean_r": 0.958083130671536,
"aux_brier/n_active_tok": 224.5,
"aux_brier/n_groups": 15.90625,
"aux_brier/n_step_records": 56.125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5510873640794901,
"calib/avg_num_step_conf": 7.046875,
"calib/ece": 0.45339525691699606,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.003339332583427078,
"calib/mean_conf": 0.048581027667984195,
"calib/mu_c": 0.050244094488188985,
"calib/mu_w": 0.04690476190476191,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.02960688340891283,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2249.0,
"completions/max_terminated_length": 2249.0,
"completions/mean_length": 447.2890625,
"completions/mean_terminated_length": 447.2890625,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.1536872535943985,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0038,
"num_tokens": 18203642.0,
"reward": 1.1151789426803589,
"reward_std": 0.28822559118270874,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5310282707214355,
"rewards/format_reward_step_strict": 0.97265625,
"step": 86
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -8.944583430592346e-07,
"aux_brier/mean_group_std": 0.07160175411029325,
"aux_brier/mean_r": 0.9462595403132358,
"aux_brier/n_active_tok": 191.5,
"aux_brier/n_groups": 11.46875,
"aux_brier/n_step_records": 47.875,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5774426091825308,
"calib/avg_num_step_conf": 6.3046875,
"calib/ece": 0.5753252032520326,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0032838745800672017,
"calib/mean_conf": 0.04256097560975611,
"calib/mu_c": 0.04381578947368421,
"calib/mu_w": 0.04053191489361701,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.01781642904073745,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2421.0,
"completions/max_terminated_length": 2421.0,
"completions/mean_length": 360.046875,
"completions/mean_terminated_length": 368.6880187988281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.0928,
"grad_norm": 0.2116979956626892,
"learning_rate": 3.138888888888889e-06,
"loss": -0.0403,
"num_tokens": 18401310.0,
"reward": 1.1824182271957397,
"reward_std": 0.30382513999938965,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.41717302799224854,
"rewards/format_reward_step_strict": 0.9609375,
"step": 87
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.5890178639565988e-07,
"aux_brier/mean_group_std": 0.06549508491704442,
"aux_brier/mean_r": 0.9563637966917302,
"aux_brier/n_active_tok": 224.125,
"aux_brier/n_groups": 13.46875,
"aux_brier/n_step_records": 56.03125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4663870885754226,
"calib/avg_num_step_conf": 7.00390625,
"calib/ece": 0.44258964143426294,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0037139407802770294,
"calib/mean_conf": 0.04346613545816733,
"calib/mu_c": 0.04155737704918033,
"calib/mu_w": 0.04527131782945736,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.02214444271668734,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2583.0,
"completions/max_terminated_length": 2583.0,
"completions/mean_length": 437.359375,
"completions/mean_terminated_length": 437.359375,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.07773371785879135,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.0556,
"num_tokens": 18623122.0,
"reward": 1.1020925045013428,
"reward_std": 0.23363706469535828,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.5411823987960815,
"rewards/format_reward_step_strict": 0.98046875,
"step": 88
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -7.280286938929348e-07,
"aux_brier/mean_group_std": 0.0518077710895089,
"aux_brier/mean_r": 0.9629384917778879,
"aux_brier/n_active_tok": 242.375,
"aux_brier/n_groups": 16.125,
"aux_brier/n_step_records": 60.59375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.49404533230887443,
"calib/avg_num_step_conf": 7.578125,
"calib/ece": 0.42131474103585664,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.00398406374501992,
"calib/gap": -0.01051286976565502,
"calib/mean_conf": 0.04442231075697212,
"calib/mu_c": 0.038684210526315786,
"calib/mu_w": 0.049197080291970806,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.005776892430278884,
"calib/std_conf": 0.06732780115972177,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2464.0,
"completions/max_terminated_length": 2464.0,
"completions/mean_length": 459.70703125,
"completions/mean_terminated_length": 459.70703125,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.01032259501516819,
"learning_rate": 3.0833333333333336e-06,
"loss": 0.0385,
"num_tokens": 18849695.0,
"reward": 1.0753793716430664,
"reward_std": 0.2134411334991455,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.5593301057815552,
"rewards/format_reward_step_strict": 0.97265625,
"step": 89
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.459777750764893e-06,
"aux_brier/mean_group_std": 0.09075002981173923,
"aux_brier/mean_r": 0.9308858571250507,
"aux_brier/n_active_tok": 255.875,
"aux_brier/n_groups": 18.0625,
"aux_brier/n_step_records": 63.96875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.564327485380117,
"calib/avg_num_step_conf": 7.99609375,
"calib/ece": 0.5107936507936508,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004294431731502672,
"calib/mean_conf": 0.03682539682539683,
"calib/mu_c": 0.03876811594202899,
"calib/mu_w": 0.034473684210526316,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.01571829354054239,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2633.0,
"completions/max_terminated_length": 2633.0,
"completions/mean_length": 447.8984375,
"completions/mean_terminated_length": 447.8984375,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.096,
"grad_norm": 0.07545869052410126,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0133,
"num_tokens": 19067677.0,
"reward": 1.1536109447479248,
"reward_std": 0.18574708700180054,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.48163124918937683,
"rewards/format_reward_step_strict": 0.98046875,
"step": 90
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.4969813386954698e-06,
"aux_brier/mean_group_std": 0.05015517888576402,
"aux_brier/mean_r": 0.970973050927131,
"aux_brier/n_active_tok": 242.625,
"aux_brier/n_groups": 15.28125,
"aux_brier/n_step_records": 60.65625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5331041507512095,
"calib/avg_num_step_conf": 7.5859375,
"calib/ece": 0.4921513944223108,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011261777438248063,
"calib/mean_conf": 0.03454183266932271,
"calib/mu_c": 0.03507575757575758,
"calib/mu_w": 0.03394957983193277,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.00039840637450199205,
"calib/std_conf": 0.013658077124723993,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2512.0,
"completions/max_terminated_length": 2512.0,
"completions/mean_length": 460.265625,
"completions/mean_terminated_length": 460.265625,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.2291233241558075,
"learning_rate": 3.0277777777777776e-06,
"loss": 0.0773,
"num_tokens": 19293217.0,
"reward": 1.1307750940322876,
"reward_std": 0.2191537320613861,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.4996629059314728,
"rewards/format_reward_step_strict": 0.98046875,
"step": 91
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.36811778611301e-05,
"aux_brier/mean_group_std": 0.04263066455186372,
"aux_brier/mean_r": 0.9737503482426485,
"aux_brier/n_active_tok": 222.5,
"aux_brier/n_groups": 13.5,
"aux_brier/n_step_records": 55.625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.50998550998551,
"calib/avg_num_step_conf": 6.953125,
"calib/ece": 0.5282283464566929,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006205506205506223,
"calib/mean_conf": 0.034763779527559054,
"calib/mu_c": 0.03503496503496503,
"calib/mu_w": 0.03441441441441441,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.013241521841382182,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2014.0,
"completions/max_terminated_length": 2014.0,
"completions/mean_length": 405.9453125,
"completions/mean_terminated_length": 405.9453125,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.16720464825630188,
"learning_rate": 3e-06,
"loss": 0.0335,
"num_tokens": 19503859.0,
"reward": 1.166670322418213,
"reward_std": 0.20974203944206238,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.4635566473007202,
"rewards/format_reward_step_strict": 0.984375,
"step": 92
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -9.93929180403974e-07,
"aux_brier/mean_group_std": 0.04500498375369191,
"aux_brier/mean_r": 0.9720751749813877,
"aux_brier/n_active_tok": 249.0,
"aux_brier/n_groups": 15.71875,
"aux_brier/n_step_records": 62.25,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4642267124158089,
"calib/avg_num_step_conf": 7.78125,
"calib/ece": 0.44856573705179287,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0011767696022366297,
"calib/mean_conf": 0.03749003984063745,
"calib/mu_c": 0.036885245901639344,
"calib/mu_w": 0.03806201550387597,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.014243013958146587,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2520.0,
"completions/max_terminated_length": 2520.0,
"completions/mean_length": 458.41015625,
"completions/mean_terminated_length": 458.41015625,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.0992,
"grad_norm": 0.20912809669971466,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.0302,
"num_tokens": 19726988.0,
"reward": 1.0982401371002197,
"reward_std": 0.24127297103405,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.5335855484008789,
"rewards/format_reward_step_strict": 0.9765625,
"step": 93
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.1607752745123054e-06,
"aux_brier/mean_group_std": 0.060573612370259655,
"aux_brier/mean_r": 0.9575719422265172,
"aux_brier/n_active_tok": 226.0,
"aux_brier/n_groups": 14.25,
"aux_brier/n_step_records": 56.5,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5247115384615385,
"calib/avg_num_step_conf": 7.0625,
"calib/ece": 0.48696,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006410256410256526,
"calib/mean_conf": 0.034,
"calib/mu_c": 0.03430769230769231,
"calib/mu_w": 0.03366666666666666,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.00048,
"calib/std_conf": 0.013236313686219437,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2926.0,
"completions/max_terminated_length": 2926.0,
"completions/mean_length": 423.81640625,
"completions/mean_terminated_length": 423.81640625,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.016376424580812454,
"learning_rate": 2.944444444444445e-06,
"loss": 0.0604,
"num_tokens": 19944165.0,
"reward": 1.1226438283920288,
"reward_std": 0.21318775415420532,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.4983879029750824,
"rewards/format_reward_step_strict": 0.97265625,
"step": 94
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.345612032912282e-06,
"aux_brier/mean_group_std": 0.0809558229476364,
"aux_brier/mean_r": 0.9460285799274464,
"aux_brier/n_active_tok": 249.875,
"aux_brier/n_groups": 15.0625,
"aux_brier/n_step_records": 62.46875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5558622002283974,
"calib/avg_num_step_conf": 7.80859375,
"calib/ece": 0.522292490118577,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0008254028676563871,
"calib/mean_conf": 0.04055335968379447,
"calib/mu_c": 0.04091549295774647,
"calib/mu_w": 0.04009009009009008,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0007905138339920949,
"calib/std_conf": 0.01934935012791396,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2845.0,
"completions/max_terminated_length": 2845.0,
"completions/mean_length": 456.74609375,
"completions/mean_terminated_length": 456.74609375,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.048102181404829025,
"learning_rate": 2.916666666666667e-06,
"loss": 0.0155,
"num_tokens": 20167220.0,
"reward": 1.1631183624267578,
"reward_std": 0.2865407466888428,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.4727863073348999,
"rewards/format_reward_step_strict": 0.98046875,
"step": 95
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.068151539025287e-06,
"aux_brier/mean_group_std": 0.05631804725341152,
"aux_brier/mean_r": 0.9589422327445754,
"aux_brier/n_active_tok": 242.25,
"aux_brier/n_groups": 16.0,
"aux_brier/n_step_records": 60.5625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4966334500403986,
"calib/avg_num_step_conf": 7.5703125,
"calib/ece": 0.586984126984127,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00016967411796390414,
"calib/mean_conf": 0.04,
"calib/mu_c": 0.03993670886075949,
"calib/mu_w": 0.0401063829787234,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.013540064007726602,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2541.0,
"completions/max_terminated_length": 2541.0,
"completions/mean_length": 412.11328125,
"completions/mean_terminated_length": 412.11328125,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.1024,
"grad_norm": 0.08516115695238113,
"learning_rate": 2.888888888888889e-06,
"loss": 0.0654,
"num_tokens": 20378537.0,
"reward": 1.2169634103775024,
"reward_std": 0.1943175047636032,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.41472887992858887,
"rewards/format_reward_step_strict": 0.984375,
"step": 96
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.663974127597001e-06,
"aux_brier/mean_group_std": 0.053727748085427046,
"aux_brier/mean_r": 0.9690100896784644,
"aux_brier/n_active_tok": 240.375,
"aux_brier/n_groups": 14.0625,
"aux_brier/n_step_records": 60.09375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4723899913718723,
"calib/avg_num_step_conf": 7.51171875,
"calib/ece": 0.48066666666666674,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.001413780352520648,
"calib/mean_conf": 0.040901960784313726,
"calib/mu_c": 0.040225563909774435,
"calib/mu_w": 0.04163934426229508,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.014292813087676144,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2349.0,
"completions/max_terminated_length": 2349.0,
"completions/mean_length": 420.0703125,
"completions/mean_terminated_length": 420.0703125,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.1330837458372116,
"learning_rate": 2.861111111111111e-06,
"loss": 0.0271,
"num_tokens": 20591147.0,
"reward": 1.1467005014419556,
"reward_std": 0.2592535614967346,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.5164894461631775,
"rewards/format_reward_step_strict": 0.99609375,
"step": 97
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.722335791129684e-06,
"aux_brier/mean_group_std": 0.08985709111939484,
"aux_brier/mean_r": 0.9369056604417026,
"aux_brier/n_active_tok": 259.75,
"aux_brier/n_groups": 18.0625,
"aux_brier/n_step_records": 64.9375,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4499672346002621,
"calib/avg_num_step_conf": 8.3125,
"calib/ece": 0.5217269076305221,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0015190039318479717,
"calib/mean_conf": 0.04052208835341366,
"calib/mu_c": 0.039857142857142855,
"calib/mu_w": 0.04137614678899083,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.014118279545522784,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2665.0,
"completions/max_terminated_length": 2665.0,
"completions/mean_length": 461.078125,
"completions/mean_terminated_length": 462.88629150390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.23408974707126617,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0638,
"num_tokens": 20815367.0,
"reward": 1.1500990390777588,
"reward_std": 0.23604661226272583,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.4675839841365814,
"rewards/format_reward_step_strict": 0.97265625,
"step": 98
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.0724527822747465e-06,
"aux_brier/mean_group_std": 0.03908728093991029,
"aux_brier/mean_r": 0.9764337692755751,
"aux_brier/n_active_tok": 263.875,
"aux_brier/n_groups": 16.15625,
"aux_brier/n_step_records": 65.96875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5371052631578948,
"calib/avg_num_step_conf": 8.24609375,
"calib/ece": 0.26079681274900396,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.001084962406015029,
"calib/mean_conf": 0.043585657370517936,
"calib/mu_c": 0.044342105263157884,
"calib/mu_w": 0.043257142857142855,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0007968127490039841,
"calib/std_conf": 0.018225338996572207,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2575.0,
"completions/max_terminated_length": 2575.0,
"completions/mean_length": 499.046875,
"completions/mean_terminated_length": 499.046875,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.1056,
"grad_norm": 0.011097099632024765,
"learning_rate": 2.805555555555556e-06,
"loss": 0.0774,
"num_tokens": 21048923.0,
"reward": 0.9679489731788635,
"reward_std": 0.22912907600402832,
"rewards/accuracy_reward_step": 0.30078125,
"rewards/final_brier_reward_step": 0.7077336311340332,
"rewards/format_reward_step_strict": 0.98046875,
"step": 99
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.6559374108336975e-06,
"aux_brier/mean_group_std": 0.06833585449856798,
"aux_brier/mean_r": 0.9478820524765457,
"aux_brier/n_active_tok": 245.25,
"aux_brier/n_groups": 14.5,
"aux_brier/n_step_records": 61.3125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5469354838709678,
"calib/avg_num_step_conf": 7.6640625,
"calib/ece": 0.4566666666666666,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002018709677419357,
"calib/mean_conf": 0.04132530120481928,
"calib/mu_c": 0.042338709677419366,
"calib/mu_w": 0.04032000000000001,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.012684406928435523,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3035.0,
"completions/max_terminated_length": 3035.0,
"completions/mean_length": 462.703125,
"completions/mean_terminated_length": 462.703125,
"completions/min_length": 115.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.026581548154354095,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0901,
"num_tokens": 21274783.0,
"reward": 1.1005620956420898,
"reward_std": 0.26199617981910706,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5272483825683594,
"rewards/format_reward_step_strict": 0.96875,
"step": 100
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -8.104555376253764e-07,
"aux_brier/mean_group_std": 0.03822865550783053,
"aux_brier/mean_r": 0.9741097669206261,
"aux_brier/n_active_tok": 295.25,
"aux_brier/n_groups": 19.25,
"aux_brier/n_step_records": 73.8125,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4828681956341532,
"calib/avg_num_step_conf": 9.2265625,
"calib/ece": 0.33858870967741933,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00020033158331031475,
"calib/mean_conf": 0.04044354838709678,
"calib/mu_c": 0.0403191489361702,
"calib/mu_w": 0.04051948051948052,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.012222930868734072,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2672.0,
"completions/max_terminated_length": 2672.0,
"completions/mean_length": 522.08984375,
"completions/mean_terminated_length": 524.1372680664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.0646122470498085,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0916,
"num_tokens": 21515430.0,
"reward": 1.008923053741455,
"reward_std": 0.24845989048480988,
"rewards/accuracy_reward_step": 0.3671875,
"rewards/final_brier_reward_step": 0.629442572593689,
"rewards/format_reward_step_strict": 0.96875,
"step": 101
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.4775850829837918e-06,
"aux_brier/mean_group_std": 0.050068782589601665,
"aux_brier/mean_r": 0.9631128284547863,
"aux_brier/n_active_tok": 236.375,
"aux_brier/n_groups": 13.59375,
"aux_brier/n_step_records": 59.09375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5248051948051948,
"calib/avg_num_step_conf": 7.4453125,
"calib/ece": 0.5674015748031497,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00016883116883117943,
"calib/mean_conf": 0.03889763779527559,
"calib/mu_c": 0.03883116883116882,
"calib/mu_w": 0.039,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.01036562660394584,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1889.0,
"completions/max_terminated_length": 1889.0,
"completions/mean_length": 399.2734375,
"completions/mean_terminated_length": 400.8392333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 67.0,
"epoch": 0.1088,
"grad_norm": 0.37528353929519653,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.0008,
"num_tokens": 21724340.0,
"reward": 1.210496425628662,
"reward_std": 0.202207550406456,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.43573594093322754,
"rewards/format_reward_step_strict": 0.9921875,
"step": 102
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.4638724062931985e-06,
"aux_brier/mean_group_std": 0.048043066482962964,
"aux_brier/mean_r": 0.9690465192308666,
"aux_brier/n_active_tok": 287.125,
"aux_brier/n_groups": 19.71875,
"aux_brier/n_step_records": 71.78125,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5147196261682244,
"calib/avg_num_step_conf": 9.15625,
"calib/ece": 0.5265991902834009,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011815754339118828,
"calib/mean_conf": 0.040202429149797575,
"calib/mu_c": 0.04071428571428571,
"calib/mu_w": 0.03953271028037383,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.011919072346336648,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2856.0,
"completions/max_terminated_length": 2856.0,
"completions/mean_length": 548.0,
"completions/mean_terminated_length": 552.31494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.5299721956253052,
"learning_rate": 2.6944444444444444e-06,
"loss": 0.0575,
"num_tokens": 21969180.0,
"reward": 1.1415690183639526,
"reward_std": 0.2539811134338379,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.45690077543258667,
"rewards/format_reward_step_strict": 0.9609375,
"step": 103
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.285170763702496e-06,
"aux_brier/mean_group_std": 0.04751414936699034,
"aux_brier/mean_r": 0.9696152340986504,
"aux_brier/n_active_tok": 254.125,
"aux_brier/n_groups": 15.28125,
"aux_brier/n_step_records": 63.53125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5002827698881488,
"calib/avg_num_step_conf": 7.94140625,
"calib/ece": 0.3869019607843137,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 2.5135101168749507e-06,
"calib/mean_conf": 0.040549019607843136,
"calib/mu_c": 0.04055045871559633,
"calib/mu_w": 0.040547945205479455,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.01046434417624111,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2773.0,
"completions/max_terminated_length": 2773.0,
"completions/mean_length": 455.11328125,
"completions/mean_terminated_length": 455.11328125,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.3309130370616913,
"learning_rate": 2.666666666666667e-06,
"loss": 0.028,
"num_tokens": 22192369.0,
"reward": 1.0746023654937744,
"reward_std": 0.20162808895111084,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6030968427658081,
"rewards/format_reward_step_strict": 0.99609375,
"step": 104
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.7118542688109315e-06,
"aux_brier/mean_group_std": 0.0437891455200206,
"aux_brier/mean_r": 0.975865268789005,
"aux_brier/n_active_tok": 267.875,
"aux_brier/n_groups": 16.46875,
"aux_brier/n_step_records": 66.96875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.48979013045944414,
"calib/avg_num_step_conf": 8.37109375,
"calib/ece": 0.44857142857142857,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0014520703346568298,
"calib/mean_conf": 0.03952380952380953,
"calib/mu_c": 0.038780487804878055,
"calib/mu_w": 0.040232558139534885,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.011293848786315641,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2521.0,
"completions/max_terminated_length": 2521.0,
"completions/mean_length": 486.1171875,
"completions/mean_terminated_length": 486.1171875,
"completions/min_length": 153.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.112,
"grad_norm": 1.2331123352050781,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.0586,
"num_tokens": 22422575.0,
"reward": 1.1114397048950195,
"reward_std": 0.26240772008895874,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.539508581161499,
"rewards/format_reward_step_strict": 0.984375,
"step": 105
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.8908124227733367e-07,
"aux_brier/mean_group_std": 0.0741900223851166,
"aux_brier/mean_r": 0.9525063079484763,
"aux_brier/n_active_tok": 244.25,
"aux_brier/n_groups": 14.75,
"aux_brier/n_step_records": 61.0625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5614988558352404,
"calib/avg_num_step_conf": 7.6328125,
"calib/ece": 0.4142857142857143,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0008848207475209702,
"calib/mean_conf": 0.04206349206349207,
"calib/mu_c": 0.041578947368421056,
"calib/mu_w": 0.042463768115942026,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.001984126984126984,
"calib/std_conf": 0.030765819529106585,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2886.0,
"completions/max_terminated_length": 2886.0,
"completions/mean_length": 457.40625,
"completions/mean_terminated_length": 457.40625,
"completions/min_length": 168.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.730738639831543,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.0633,
"num_tokens": 22644255.0,
"reward": 1.0752407312393188,
"reward_std": 0.24985721707344055,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.5665878653526306,
"rewards/format_reward_step_strict": 0.9765625,
"step": 106
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -8.50917928452155e-06,
"aux_brier/mean_group_std": 0.057776691252402666,
"aux_brier/mean_r": 0.960953601038585,
"aux_brier/n_active_tok": 244.25,
"aux_brier/n_groups": 14.75,
"aux_brier/n_step_records": 61.0625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5284919162470183,
"calib/avg_num_step_conf": 7.796875,
"calib/ece": 0.5722222222222223,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0010204081632653114,
"calib/mean_conf": 0.03888888888888889,
"calib/mu_c": 0.039285714285714285,
"calib/mu_w": 0.038265306122448974,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.009148008910107181,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2413.0,
"completions/max_terminated_length": 2413.0,
"completions/mean_length": 437.6015625,
"completions/mean_terminated_length": 439.31768798828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.26983603835105896,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.0537,
"num_tokens": 22860897.0,
"reward": 1.2008767127990723,
"reward_std": 0.24636337161064148,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.4285070300102234,
"rewards/format_reward_step_strict": 0.984375,
"step": 107
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.711407311035586e-06,
"aux_brier/mean_group_std": 0.06834700084118482,
"aux_brier/mean_r": 0.9478834800573411,
"aux_brier/n_active_tok": 274.25,
"aux_brier/n_groups": 16.40625,
"aux_brier/n_step_records": 68.5625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5430125692661171,
"calib/avg_num_step_conf": 8.57421875,
"calib/ece": 0.5663453815261044,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00215164211379916,
"calib/mean_conf": 0.040080321285140566,
"calib/mu_c": 0.040927152317880786,
"calib/mu_w": 0.038775510204081626,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.009356481672849375,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2937.0,
"completions/max_terminated_length": 2937.0,
"completions/mean_length": 504.4296875,
"completions/mean_terminated_length": 504.4296875,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.1152,
"grad_norm": 0.26578643918037415,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0743,
"num_tokens": 23093263.0,
"reward": 1.1815803050994873,
"reward_std": 0.25909295678138733,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/final_brier_reward_step": 0.42944610118865967,
"rewards/format_reward_step_strict": 0.96875,
"step": 108
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.3005259867112677e-07,
"aux_brier/mean_group_std": 0.034005848151057484,
"aux_brier/mean_r": 0.9785348621308146,
"aux_brier/n_active_tok": 309.25,
"aux_brier/n_groups": 23.28125,
"aux_brier/n_step_records": 77.3125,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.5537815126050419,
"calib/avg_num_step_conf": 9.921875,
"calib/ece": 0.44991147540983606,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0030211764705882424,
"calib/mean_conf": 0.03779344262295081,
"calib/mu_c": 0.039341176470588234,
"calib/mu_w": 0.03631999999999999,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.00963398814056824,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2995.0,
"completions/max_terminated_length": 2995.0,
"completions/mean_length": 535.2109375,
"completions/mean_terminated_length": 539.4251708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.37634411454200745,
"learning_rate": 2.5277777777777778e-06,
"loss": 0.1403,
"num_tokens": 23334877.0,
"reward": 1.0663983821868896,
"reward_std": 0.20794661343097687,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.5234063863754272,
"rewards/format_reward_step_strict": 0.94140625,
"step": 109
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.382355060144171e-06,
"aux_brier/mean_group_std": 0.05514414110685423,
"aux_brier/mean_r": 0.9610729802289838,
"aux_brier/n_active_tok": 238.125,
"aux_brier/n_groups": 14.125,
"aux_brier/n_step_records": 59.53125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4605263157894737,
"calib/avg_num_step_conf": 7.44140625,
"calib/ece": 0.4095669291338583,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0008709273182957306,
"calib/mean_conf": 0.039251968503937,
"calib/mu_c": 0.0387719298245614,
"calib/mu_w": 0.03964285714285713,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.009952223273896953,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2371.0,
"completions/max_terminated_length": 2371.0,
"completions/mean_length": 466.74609375,
"completions/mean_terminated_length": 466.74609375,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.0723707377910614,
"learning_rate": 2.5e-06,
"loss": 0.0417,
"num_tokens": 23559284.0,
"reward": 1.0834276676177979,
"reward_std": 0.2757888436317444,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.5758980512619019,
"rewards/format_reward_step_strict": 0.98828125,
"step": 110
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.8873351762815904e-06,
"aux_brier/mean_group_std": 0.06157601710370413,
"aux_brier/mean_r": 0.9618023827397723,
"aux_brier/n_active_tok": 268.375,
"aux_brier/n_groups": 16.75,
"aux_brier/n_step_records": 67.09375,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5699152542372881,
"calib/avg_num_step_conf": 8.5234375,
"calib/ece": 0.43191999999999997,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0017745249101181262,
"calib/mean_conf": 0.04008,
"calib/mu_c": 0.041016949152542365,
"calib/mu_w": 0.03924242424242424,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.0,
"calib/std_conf": 0.01226350683939957,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2849.0,
"completions/max_terminated_length": 2849.0,
"completions/mean_length": 510.5,
"completions/mean_terminated_length": 514.5196533203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.1184,
"grad_norm": 0.2618515193462372,
"learning_rate": 2.4722222222222226e-06,
"loss": -0.0142,
"num_tokens": 23797380.0,
"reward": 1.0773262977600098,
"reward_std": 0.2668326795101166,
"rewards/accuracy_reward_step": 0.4609375,
"rewards/final_brier_reward_step": 0.5436800718307495,
"rewards/format_reward_step_strict": 0.9609375,
"step": 111
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.8973257561727408e-06,
"aux_brier/mean_group_std": 0.08752773473943101,
"aux_brier/mean_r": 0.94084045562474,
"aux_brier/n_active_tok": 256.625,
"aux_brier/n_groups": 15.6875,
"aux_brier/n_step_records": 64.15625,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5182474629195941,
"calib/avg_num_step_conf": 8.34375,
"calib/ece": 0.46939112903225805,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0018900598490762394,
"calib/mean_conf": 0.03867338709677419,
"calib/mu_c": 0.039603174603174605,
"calib/mu_w": 0.037713114754098366,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.011155886691895637,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2973.0,
"completions/max_terminated_length": 2973.0,
"completions/mean_length": 520.94921875,
"completions/mean_terminated_length": 527.1265258789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.11946666666666667,
"grad_norm": 1.2074123620986938,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.0074,
"num_tokens": 24038663.0,
"reward": 1.098163366317749,
"reward_std": 0.25116461515426636,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.509840726852417,
"rewards/format_reward_step_strict": 0.95703125,
"step": 112
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.4069467026978089e-06,
"aux_brier/mean_group_std": 0.07493280707516929,
"aux_brier/mean_r": 0.9570411421452618,
"aux_brier/n_active_tok": 271.625,
"aux_brier/n_groups": 18.28125,
"aux_brier/n_step_records": 67.90625,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5311181102362205,
"calib/avg_num_step_conf": 8.7578125,
"calib/ece": 0.4559920634920635,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0013499212598425125,
"calib/mean_conf": 0.04003968253968254,
"calib/mu_c": 0.04072,
"calib/mu_w": 0.03937007874015749,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.00940693833232194,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2576.0,
"completions/max_terminated_length": 2576.0,
"completions/mean_length": 474.94921875,
"completions/mean_terminated_length": 476.8117980957031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.43918195366859436,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.0525,
"num_tokens": 24265450.0,
"reward": 1.1140172481536865,
"reward_std": 0.26322633028030396,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.5341941118240356,
"rewards/format_reward_step_strict": 0.984375,
"step": 113
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 8.792451297034987e-07,
"aux_brier/mean_group_std": 0.046124216253668957,
"aux_brier/mean_r": 0.9682924531124371,
"aux_brier/n_active_tok": 258.75,
"aux_brier/n_groups": 15.46875,
"aux_brier/n_step_records": 64.6875,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5792217042217042,
"calib/avg_num_step_conf": 8.12109375,
"calib/ece": 0.5732941176470588,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0005011655011655028,
"calib/mean_conf": 0.041215686274509805,
"calib/mu_c": 0.04141025641025641,
"calib/mu_w": 0.04090909090909091,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.001372549019607843,
"calib/std_conf": 0.02223961641428461,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1891.0,
"completions/max_terminated_length": 1891.0,
"completions/mean_length": 453.24609375,
"completions/mean_terminated_length": 455.0235595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.1216,
"grad_norm": 0.5156078934669495,
"learning_rate": 2.388888888888889e-06,
"loss": 0.003,
"num_tokens": 24486505.0,
"reward": 1.213362455368042,
"reward_std": 0.21056273579597473,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.4315750002861023,
"rewards/format_reward_step_strict": 0.9921875,
"step": 114
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.0225195213875438e-06,
"aux_brier/mean_group_std": 0.06268015443666367,
"aux_brier/mean_r": 0.9657730870218638,
"aux_brier/n_active_tok": 231.375,
"aux_brier/n_groups": 12.71875,
"aux_brier/n_step_records": 57.84375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5528420275590551,
"calib/avg_num_step_conf": 7.23046875,
"calib/ece": 0.45827450980392154,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0017236712598425186,
"calib/mean_conf": 0.03976470588235294,
"calib/mu_c": 0.04062992125984252,
"calib/mu_w": 0.03890625,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.01277035205388487,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2608.0,
"completions/max_terminated_length": 2608.0,
"completions/mean_length": 440.875,
"completions/mean_terminated_length": 440.875,
"completions/min_length": 111.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.1856922209262848,
"learning_rate": 2.361111111111111e-06,
"loss": 0.0393,
"num_tokens": 24704633.0,
"reward": 1.1238644123077393,
"reward_std": 0.2826547920703888,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5345203280448914,
"rewards/format_reward_step_strict": 0.98828125,
"step": 115
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.0355788804261579e-06,
"aux_brier/mean_group_std": 0.02800406959220724,
"aux_brier/mean_r": 0.9824136114310306,
"aux_brier/n_active_tok": 290.375,
"aux_brier/n_groups": 19.4375,
"aux_brier/n_step_records": 72.59375,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.51829740544727,
"calib/avg_num_step_conf": 9.07421875,
"calib/ece": 0.4704417670682731,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011410868723376877,
"calib/mean_conf": 0.039598393574297196,
"calib/mu_c": 0.040157480314960636,
"calib/mu_w": 0.03901639344262295,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.010895605274002284,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2580.0,
"completions/max_terminated_length": 2580.0,
"completions/mean_length": 565.453125,
"completions/mean_terminated_length": 565.453125,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.6097423434257507,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0612,
"num_tokens": 24953909.0,
"reward": 1.1090649366378784,
"reward_std": 0.26453733444213867,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5143847465515137,
"rewards/format_reward_step_strict": 0.96875,
"step": 116
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.478230049034579e-06,
"aux_brier/mean_group_std": 0.04360485184978645,
"aux_brier/mean_r": 0.9724314700344239,
"aux_brier/n_active_tok": 273.875,
"aux_brier/n_groups": 17.1875,
"aux_brier/n_step_records": 68.46875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5554187192118226,
"calib/avg_num_step_conf": 8.7578125,
"calib/ece": 0.4238955823293173,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002448794399792592,
"calib/mean_conf": 0.041967871485943775,
"calib/mu_c": 0.04327586206896551,
"calib/mu_w": 0.04082706766917292,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.010439295351569445,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2090.0,
"completions/max_terminated_length": 2090.0,
"completions/mean_length": 486.9375,
"completions/mean_terminated_length": 488.8470764160156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.1248,
"grad_norm": 0.8644006848335266,
"learning_rate": 2.305555555555556e-06,
"loss": 0.0284,
"num_tokens": 25185165.0,
"reward": 1.0777117013931274,
"reward_std": 0.2392779290676117,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/final_brier_reward_step": 0.5530344247817993,
"rewards/format_reward_step_strict": 0.96484375,
"step": 117
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.743756228580253e-06,
"aux_brier/mean_group_std": 0.03310747233374885,
"aux_brier/mean_r": 0.9780488771064113,
"aux_brier/n_active_tok": 292.0,
"aux_brier/n_groups": 20.53125,
"aux_brier/n_step_records": 73.0,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5942113821138211,
"calib/avg_num_step_conf": 9.125,
"calib/ece": 0.45391129032258065,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0025463414634146336,
"calib/mean_conf": 0.04286290322580645,
"calib/mu_c": 0.04414634146341463,
"calib/mu_w": 0.0416,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0004032258064516129,
"calib/std_conf": 0.010408460173540713,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2954.0,
"completions/max_terminated_length": 2954.0,
"completions/mean_length": 559.9375,
"completions/mean_terminated_length": 559.9375,
"completions/min_length": 157.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.6745241284370422,
"learning_rate": 2.277777777777778e-06,
"loss": 0.0691,
"num_tokens": 25432517.0,
"reward": 1.097048282623291,
"reward_std": 0.24959829449653625,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.5288183689117432,
"rewards/format_reward_step_strict": 0.96875,
"step": 118
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.710695929052486e-06,
"aux_brier/mean_group_std": 0.07189017199223828,
"aux_brier/mean_r": 0.9597806939114144,
"aux_brier/n_active_tok": 293.125,
"aux_brier/n_groups": 20.0,
"aux_brier/n_step_records": 73.28125,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5438347107438016,
"calib/avg_num_step_conf": 9.34375,
"calib/ece": 0.4652439024390244,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0014512396694214974,
"calib/mean_conf": 0.042886178861788624,
"calib/mu_c": 0.043600000000000014,
"calib/mu_w": 0.042148760330578516,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.00980517803475346,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3065.0,
"completions/max_terminated_length": 3065.0,
"completions/mean_length": 577.5390625,
"completions/mean_terminated_length": 579.803955078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.27141526341438293,
"learning_rate": 2.25e-06,
"loss": -0.019,
"num_tokens": 25685431.0,
"reward": 1.0970935821533203,
"reward_std": 0.2445300966501236,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.5133745670318604,
"rewards/format_reward_step_strict": 0.9609375,
"step": 119
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.0665530983409184e-06,
"aux_brier/mean_group_std": 0.06747079615723417,
"aux_brier/mean_r": 0.9619801587336653,
"aux_brier/n_active_tok": 239.875,
"aux_brier/n_groups": 13.8125,
"aux_brier/n_step_records": 59.96875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.569947014732489,
"calib/avg_num_step_conf": 7.5859375,
"calib/ece": 0.5368650793650794,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0016141121736883007,
"calib/mean_conf": 0.043293650793650794,
"calib/mu_c": 0.04397260273972603,
"calib/mu_w": 0.04235849056603773,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.0003968253968253968,
"calib/std_conf": 0.011049360722955488,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2834.0,
"completions/max_terminated_length": 2834.0,
"completions/mean_length": 481.7421875,
"completions/mean_terminated_length": 483.63140869140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.128,
"grad_norm": 1.0078459978103638,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0569,
"num_tokens": 25915445.0,
"reward": 1.1690651178359985,
"reward_std": 0.23911015689373016,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.45751091837882996,
"rewards/format_reward_step_strict": 0.96875,
"step": 120
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 6.75080295375885e-06,
"aux_brier/mean_group_std": 0.040355337648619094,
"aux_brier/mean_r": 0.972082247007521,
"aux_brier/n_active_tok": 272.125,
"aux_brier/n_groups": 16.9375,
"aux_brier/n_step_records": 68.03125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5502019437081914,
"calib/avg_num_step_conf": 8.50390625,
"calib/ece": 0.5065612648221345,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0014779755143253803,
"calib/mean_conf": 0.043794466403162056,
"calib/mu_c": 0.04446043165467626,
"calib/mu_w": 0.04298245614035088,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.00047430830039525685,
"calib/std_conf": 0.011619343326530134,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2665.0,
"completions/max_terminated_length": 2665.0,
"completions/mean_length": 547.52734375,
"completions/mean_terminated_length": 547.52734375,
"completions/min_length": 172.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.7970133423805237,
"learning_rate": 2.1944444444444445e-06,
"loss": 0.0269,
"num_tokens": 26160668.0,
"reward": 1.1600005626678467,
"reward_std": 0.27700334787368774,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.4915648102760315,
"rewards/format_reward_step_strict": 0.98828125,
"step": 121
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.735171854006957e-06,
"aux_brier/mean_group_std": 0.05605299531956594,
"aux_brier/mean_r": 0.9637033242442844,
"aux_brier/n_active_tok": 260.875,
"aux_brier/n_groups": 15.96875,
"aux_brier/n_step_records": 65.21875,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.6272407611693326,
"calib/avg_num_step_conf": 8.23046875,
"calib/ece": 0.5545121951219513,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.008432156646442375,
"calib/mean_conf": 0.04711382113821139,
"calib/mu_c": 0.05047297297297298,
"calib/mu_w": 0.0420408163265306,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.040637741929624244,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2685.0,
"completions/max_terminated_length": 2685.0,
"completions/mean_length": 513.359375,
"completions/mean_terminated_length": 517.4015502929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.2785504460334778,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0264,
"num_tokens": 26399432.0,
"reward": 1.1661373376846313,
"reward_std": 0.24181430041790009,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.43017420172691345,
"rewards/format_reward_step_strict": 0.953125,
"step": 122
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.8249857914598433e-06,
"aux_brier/mean_group_std": 0.05829732431195981,
"aux_brier/mean_r": 0.9573845217098387,
"aux_brier/n_active_tok": 268.625,
"aux_brier/n_groups": 16.15625,
"aux_brier/n_step_records": 67.15625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4807990189750871,
"calib/avg_num_step_conf": 8.640625,
"calib/ece": 0.4654216867469879,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0017122757196334146,
"calib/mean_conf": 0.044618473895582336,
"calib/mu_c": 0.04377952755905511,
"calib/mu_w": 0.04549180327868853,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.014505054515800577,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2423.0,
"completions/max_terminated_length": 2423.0,
"completions/mean_length": 530.5078125,
"completions/mean_terminated_length": 536.7984619140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.1312,
"grad_norm": 0.4846332371234894,
"learning_rate": 2.138888888888889e-06,
"loss": -0.0207,
"num_tokens": 26640530.0,
"reward": 1.1089584827423096,
"reward_std": 0.31316617131233215,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5139589905738831,
"rewards/format_reward_step_strict": 0.96875,
"step": 123
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.8679218291860487e-07,
"aux_brier/mean_group_std": 0.057472229525174494,
"aux_brier/mean_r": 0.9662159645971783,
"aux_brier/n_active_tok": 260.625,
"aux_brier/n_groups": 15.46875,
"aux_brier/n_step_records": 65.15625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5304846274101094,
"calib/avg_num_step_conf": 8.453125,
"calib/ece": 0.5553754940711463,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0020915841584158365,
"calib/mean_conf": 0.04541501976284585,
"calib/mu_c": 0.04624999999999999,
"calib/mu_w": 0.044158415841584156,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.014835251113226726,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1870.0,
"completions/max_terminated_length": 1870.0,
"completions/mean_length": 511.12109375,
"completions/mean_terminated_length": 515.1456909179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.48033249378204346,
"learning_rate": 2.1111111111111114e-06,
"loss": -0.0299,
"num_tokens": 26878193.0,
"reward": 1.1996898651123047,
"reward_std": 0.19524884223937988,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.4471972584724426,
"rewards/format_reward_step_strict": 0.98828125,
"step": 124
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -9.294555074201849e-07,
"aux_brier/mean_group_std": 0.06517470467191165,
"aux_brier/mean_r": 0.9624438149609646,
"aux_brier/n_active_tok": 281.375,
"aux_brier/n_groups": 19.8125,
"aux_brier/n_step_records": 70.34375,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5046576618537495,
"calib/avg_num_step_conf": 9.18359375,
"calib/ece": 0.49658536585365853,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0004537893406081836,
"calib/mean_conf": 0.04577235772357724,
"calib/mu_c": 0.045563909774436084,
"calib/mu_w": 0.04601769911504427,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0008536585365853661,
"calib/std_conf": 0.01133439474331281,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2869.0,
"completions/max_terminated_length": 2869.0,
"completions/mean_length": 545.97265625,
"completions/mean_terminated_length": 550.2716674804688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.3349872827529907,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.054,
"num_tokens": 27122770.0,
"reward": 1.1255595684051514,
"reward_std": 0.27273088693618774,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.4866132438182831,
"rewards/format_reward_step_strict": 0.9609375,
"step": 125
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -6.586428731011895e-06,
"aux_brier/mean_group_std": 0.07592454433965073,
"aux_brier/mean_r": 0.9427247285566948,
"aux_brier/n_active_tok": 298.0,
"aux_brier/n_groups": 19.84375,
"aux_brier/n_step_records": 74.5,
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.6292750117583819,
"calib/avg_num_step_conf": 9.43359375,
"calib/ece": 0.4579918032786885,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004736276288382718,
"calib/mean_conf": 0.04610655737704919,
"calib/mu_c": 0.04845528455284553,
"calib/mu_w": 0.04371900826446281,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.012642828186042466,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2808.0,
"completions/max_terminated_length": 2808.0,
"completions/mean_length": 587.05078125,
"completions/mean_terminated_length": 591.6732177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.1344,
"grad_norm": 0.5688815712928772,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0485,
"num_tokens": 27378519.0,
"reward": 1.0862913131713867,
"reward_std": 0.21779096126556396,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.5170402526855469,
"rewards/format_reward_step_strict": 0.953125,
"step": 126
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.528456068962015e-07,
"aux_brier/mean_group_std": 0.04799782578420207,
"aux_brier/mean_r": 0.968363675771192,
"aux_brier/n_active_tok": 301.125,
"aux_brier/n_groups": 20.1875,
"aux_brier/n_step_records": 75.28125,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5944516129032258,
"calib/avg_num_step_conf": 9.52734375,
"calib/ece": 0.4515261044176706,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.003987741935483874,
"calib/mean_conf": 0.046465863453815266,
"calib/mu_c": 0.04846774193548388,
"calib/mu_w": 0.044480000000000006,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.014629282380530732,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2840.0,
"completions/max_terminated_length": 2840.0,
"completions/mean_length": 565.55078125,
"completions/mean_terminated_length": 567.7686767578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.1792018562555313,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0547,
"num_tokens": 27626972.0,
"reward": 1.101007342338562,
"reward_std": 0.21284964680671692,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5290297269821167,
"rewards/format_reward_step_strict": 0.96875,
"step": 127
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.730827152268688e-06,
"aux_brier/mean_group_std": 0.0448499201654236,
"aux_brier/mean_r": 0.9670285229414458,
"aux_brier/n_active_tok": 243.125,
"aux_brier/n_groups": 14.4375,
"aux_brier/n_step_records": 60.78125,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5500327653997379,
"calib/avg_num_step_conf": 7.8515625,
"calib/ece": 0.5187349397590362,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011120576671035387,
"calib/mean_conf": 0.04544176706827309,
"calib/mu_c": 0.04592857142857143,
"calib/mu_w": 0.04481651376146789,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0009638554216867469,
"calib/std_conf": 0.012491186551156098,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2706.0,
"completions/max_terminated_length": 2706.0,
"completions/mean_length": 522.80859375,
"completions/mean_terminated_length": 531.107177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.23012739419937134,
"learning_rate": 2.0000000000000003e-06,
"loss": -0.0033,
"num_tokens": 27867475.0,
"reward": 1.146785020828247,
"reward_std": 0.24630600214004517,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.46995264291763306,
"rewards/format_reward_step_strict": 0.96484375,
"step": 128
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.2225584035725525e-06,
"aux_brier/mean_group_std": 0.07485034799952374,
"aux_brier/mean_r": 0.9532100457506557,
"aux_brier/n_active_tok": 255.125,
"aux_brier/n_groups": 15.46875,
"aux_brier/n_step_records": 63.78125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5524647887323944,
"calib/avg_num_step_conf": 8.0546875,
"calib/ece": 0.5166269841269842,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0014711907810499372,
"calib/mean_conf": 0.04773809523809523,
"calib/mu_c": 0.04838028169014084,
"calib/mu_w": 0.0469090909090909,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0004365079365079365,
"calib/std_conf": 0.01185405869240545,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2399.0,
"completions/max_terminated_length": 2399.0,
"completions/mean_length": 496.1953125,
"completions/mean_terminated_length": 498.1412048339844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.1376,
"grad_norm": 0.1529451161623001,
"learning_rate": 1.9722222222222224e-06,
"loss": 0.015,
"num_tokens": 28096885.0,
"reward": 1.1651663780212402,
"reward_std": 0.17689114809036255,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.48097774386405945,
"rewards/format_reward_step_strict": 0.98046875,
"step": 129
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.2395922233121581e-06,
"aux_brier/mean_group_std": 0.05960076269232059,
"aux_brier/mean_r": 0.9625159202485318,
"aux_brier/n_active_tok": 248.5,
"aux_brier/n_groups": 13.78125,
"aux_brier/n_step_records": 62.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6492900213966154,
"calib/avg_num_step_conf": 7.8125,
"calib/ece": 0.5746484374999999,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005729754263113521,
"calib/mean_conf": 0.04644531250000001,
"calib/mu_c": 0.04861635220125785,
"calib/mu_w": 0.04288659793814433,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.0,
"calib/std_conf": 0.013208844452765114,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1577.0,
"completions/max_terminated_length": 1577.0,
"completions/mean_length": 479.11328125,
"completions/mean_terminated_length": 480.9921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.16696175932884216,
"learning_rate": 1.944444444444445e-06,
"loss": 0.012,
"num_tokens": 28324826.0,
"reward": 1.210479974746704,
"reward_std": 0.17544493079185486,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.4200449287891388,
"rewards/format_reward_step_strict": 0.96875,
"step": 130
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.270774924761909e-06,
"aux_brier/mean_group_std": 0.05245029687610163,
"aux_brier/mean_r": 0.9671859307003272,
"aux_brier/n_active_tok": 243.375,
"aux_brier/n_groups": 13.90625,
"aux_brier/n_step_records": 60.84375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.6170370848946892,
"calib/avg_num_step_conf": 7.60546875,
"calib/ece": 0.3822310756972111,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01720764956712753,
"calib/mean_conf": 0.055219123505976096,
"calib/mu_c": 0.06495412844036696,
"calib/mu_w": 0.047746478873239434,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0015936254980079682,
"calib/std_conf": 0.06435902002821915,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2900.0,
"completions/max_terminated_length": 2900.0,
"completions/mean_length": 486.2265625,
"completions/mean_terminated_length": 486.2265625,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.13326288759708405,
"learning_rate": 1.916666666666667e-06,
"loss": -0.0098,
"num_tokens": 28555508.0,
"reward": 1.0638514757156372,
"reward_std": 0.1941002905368805,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.599155843257904,
"rewards/format_reward_step_strict": 0.9765625,
"step": 131
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.596242803549643e-08,
"aux_brier/mean_group_std": 0.09452182542658515,
"aux_brier/mean_r": 0.9286314229311331,
"aux_brier/n_active_tok": 267.875,
"aux_brier/n_groups": 16.375,
"aux_brier/n_step_records": 66.96875,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5734089688506981,
"calib/avg_num_step_conf": 8.375,
"calib/ece": 0.5588799999999999,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002553705692803436,
"calib/mean_conf": 0.04992,
"calib/mu_c": 0.05092105263157895,
"calib/mu_w": 0.048367346938775514,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0004,
"calib/std_conf": 0.013798318738165168,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2848.0,
"completions/max_terminated_length": 2848.0,
"completions/mean_length": 511.48046875,
"completions/mean_terminated_length": 511.48046875,
"completions/min_length": 147.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.1408,
"grad_norm": 0.3829323649406433,
"learning_rate": 1.888888888888889e-06,
"loss": 0.0553,
"num_tokens": 28792039.0,
"reward": 1.1843125820159912,
"reward_std": 0.2582811713218689,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.4325625002384186,
"rewards/format_reward_step_strict": 0.96484375,
"step": 132
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.8079169965833373e-06,
"aux_brier/mean_group_std": 0.05679022952356221,
"aux_brier/mean_r": 0.9633026394767802,
"aux_brier/n_active_tok": 277.625,
"aux_brier/n_groups": 15.1875,
"aux_brier/n_step_records": 69.40625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5233465608465608,
"calib/avg_num_step_conf": 8.875,
"calib/ece": 0.37421686746987953,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0008829365079365062,
"calib/mean_conf": 0.04955823293172691,
"calib/mu_c": 0.04904761904761905,
"calib/mu_w": 0.049930555555555554,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.001044176706827309,
"calib/std_conf": 0.019599545690014854,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2434.0,
"completions/max_terminated_length": 2434.0,
"completions/mean_length": 613.3515625,
"completions/mean_terminated_length": 620.62451171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.20712287724018097,
"learning_rate": 1.8611111111111113e-06,
"loss": 0.0001,
"num_tokens": 29055401.0,
"reward": 1.039604902267456,
"reward_std": 0.2921742796897888,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/final_brier_reward_step": 0.5959199070930481,
"rewards/format_reward_step_strict": 0.9609375,
"step": 133
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.1055995453055374e-07,
"aux_brier/mean_group_std": 0.05259560300703106,
"aux_brier/mean_r": 0.9646003977140253,
"aux_brier/n_active_tok": 301.125,
"aux_brier/n_groups": 20.84375,
"aux_brier/n_step_records": 75.28125,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5557312252964427,
"calib/avg_num_step_conf": 9.48046875,
"calib/ece": 0.48465587044534414,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0034505928853754836,
"calib/mean_conf": 0.04975708502024292,
"calib/mu_c": 0.05136363636363636,
"calib/mu_w": 0.04791304347826088,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.015082153582465577,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3006.0,
"completions/max_terminated_length": 3006.0,
"completions/mean_length": 612.5078125,
"completions/mean_terminated_length": 614.9098510742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 212.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.30655884742736816,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0458,
"num_tokens": 29321155.0,
"reward": 1.1200120449066162,
"reward_std": 0.29521992802619934,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.49567344784736633,
"rewards/format_reward_step_strict": 0.9609375,
"step": 134
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -8.457031528685821e-07,
"aux_brier/mean_group_std": 0.03996141459596204,
"aux_brier/mean_r": 0.9808545793400902,
"aux_brier/n_active_tok": 279.625,
"aux_brier/n_groups": 18.34375,
"aux_brier/n_step_records": 69.90625,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.5963361777055398,
"calib/avg_num_step_conf": 8.73828125,
"calib/ece": 0.511265306122449,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005707706894216444,
"calib/mean_conf": 0.05200000000000001,
"calib/mu_c": 0.0544927536231884,
"calib/mu_w": 0.04878504672897196,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.014613384974336483,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2560.0,
"completions/max_terminated_length": 2560.0,
"completions/mean_length": 586.828125,
"completions/mean_terminated_length": 589.1294555664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.144,
"grad_norm": 0.18846307694911957,
"learning_rate": 1.8055555555555557e-06,
"loss": 0.0848,
"num_tokens": 29577263.0,
"reward": 1.130202293395996,
"reward_std": 0.3218243718147278,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.4661218523979187,
"rewards/format_reward_step_strict": 0.94921875,
"step": 135
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.1063661412014092e-07,
"aux_brier/mean_group_std": 0.05412816794943832,
"aux_brier/mean_r": 0.9704970737134514,
"aux_brier/n_active_tok": 280.25,
"aux_brier/n_groups": 17.65625,
"aux_brier/n_step_records": 70.0625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5938271604938271,
"calib/avg_num_step_conf": 8.76171875,
"calib/ece": 0.40879518072289156,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0034697855750487477,
"calib/mean_conf": 0.05232931726907631,
"calib/mu_c": 0.05421052631578949,
"calib/mu_w": 0.05074074074074074,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0016465863453815261,
"calib/std_conf": 0.013775158944692768,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2988.0,
"completions/max_terminated_length": 2988.0,
"completions/mean_length": 544.7734375,
"completions/mean_terminated_length": 544.7734375,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.1553274393081665,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0518,
"num_tokens": 29825213.0,
"reward": 1.0728051662445068,
"reward_std": 0.20379653573036194,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.5724707245826721,
"rewards/format_reward_step_strict": 0.96875,
"step": 136
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.474809657928347e-06,
"aux_brier/mean_group_std": 0.06528294052165005,
"aux_brier/mean_r": 0.9520557914546299,
"aux_brier/n_active_tok": 299.375,
"aux_brier/n_groups": 20.78125,
"aux_brier/n_step_records": 74.84375,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4745806451612903,
"calib/avg_num_step_conf": 9.421875,
"calib/ece": 0.44831325301204816,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.004016064257028112,
"calib/gap": -0.008133548387096783,
"calib/mean_conf": 0.05738955823293173,
"calib/mu_c": 0.05330645161290322,
"calib/mu_w": 0.06144,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0038554216867469873,
"calib/std_conf": 0.05891261014368137,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2979.0,
"completions/max_terminated_length": 2979.0,
"completions/mean_length": 559.30078125,
"completions/mean_terminated_length": 561.494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.039768896996974945,
"learning_rate": 1.75e-06,
"loss": 0.0633,
"num_tokens": 30075378.0,
"reward": 1.0999794006347656,
"reward_std": 0.27602440118789673,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5327301025390625,
"rewards/format_reward_step_strict": 0.96484375,
"step": 137
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.3754860414569876e-06,
"aux_brier/mean_group_std": 0.04512002295921746,
"aux_brier/mean_r": 0.9701418410569603,
"aux_brier/n_active_tok": 267.875,
"aux_brier/n_groups": 16.46875,
"aux_brier/n_step_records": 66.96875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5121019108280255,
"calib/avg_num_step_conf": 8.3984375,
"calib/ece": 0.5724603174603174,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0015742541066040908,
"calib/mean_conf": 0.05365079365079366,
"calib/mu_c": 0.05305732484076432,
"calib/mu_w": 0.05463157894736841,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0015476190476190477,
"calib/std_conf": 0.02545157788488635,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2772.0,
"completions/max_terminated_length": 2772.0,
"completions/mean_length": 529.83984375,
"completions/mean_terminated_length": 531.9176635742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.1472,
"grad_norm": 0.17647089064121246,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0473,
"num_tokens": 30315353.0,
"reward": 1.203694224357605,
"reward_std": 0.3053821325302124,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.4241519570350647,
"rewards/format_reward_step_strict": 0.96875,
"step": 138
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.288317745319235e-06,
"aux_brier/mean_group_std": 0.06964590410507535,
"aux_brier/mean_r": 0.9577885741251095,
"aux_brier/n_active_tok": 241.125,
"aux_brier/n_groups": 12.59375,
"aux_brier/n_step_records": 60.28125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5904228222109017,
"calib/avg_num_step_conf": 7.64453125,
"calib/ece": 0.5367058823529411,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0035220326031584273,
"calib/mean_conf": 0.055450980392156866,
"calib/mu_c": 0.05688741721854305,
"calib/mu_w": 0.05336538461538462,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.01390769814405972,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2001.0,
"completions/max_terminated_length": 2001.0,
"completions/mean_length": 467.2734375,
"completions/mean_terminated_length": 469.10589599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.0982087180018425,
"learning_rate": 1.6944444444444446e-06,
"loss": 0.0032,
"num_tokens": 30538071.0,
"reward": 1.2004034519195557,
"reward_std": 0.2416152060031891,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/final_brier_reward_step": 0.4656761884689331,
"rewards/format_reward_step_strict": 0.98828125,
"step": 139
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.4861101533278926e-06,
"aux_brier/mean_group_std": 0.03429095544848432,
"aux_brier/mean_r": 0.9768792414671389,
"aux_brier/n_active_tok": 259.75,
"aux_brier/n_groups": 15.5625,
"aux_brier/n_step_records": 64.9375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6691056910569105,
"calib/avg_num_step_conf": 8.1171875,
"calib/ece": 0.594015748031496,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": -0.0006815718157181655,
"calib/mean_conf": 0.0594488188976378,
"calib/mu_c": 0.05920731707317073,
"calib/mu_w": 0.059888888888888894,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0038976377952755904,
"calib/std_conf": 0.06054616364127345,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1981.0,
"completions/max_terminated_length": 1981.0,
"completions/mean_length": 517.515625,
"completions/mean_terminated_length": 517.515625,
"completions/min_length": 150.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.028828710317611694,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0107,
"num_tokens": 30775571.0,
"reward": 1.236849308013916,
"reward_std": 0.20067928731441498,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.41614726185798645,
"rewards/format_reward_step_strict": 0.984375,
"step": 140
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -8.505575863626547e-07,
"aux_brier/mean_group_std": 0.055646158079814545,
"aux_brier/mean_r": 0.9621782349581267,
"aux_brier/n_active_tok": 249.875,
"aux_brier/n_groups": 14.53125,
"aux_brier/n_step_records": 62.46875,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.6119267337807606,
"calib/avg_num_step_conf": 7.90234375,
"calib/ece": 0.5520816326530612,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005032857941834432,
"calib/mean_conf": 0.05608163265306122,
"calib/mu_c": 0.058053691275167775,
"calib/mu_w": 0.05302083333333334,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.0,
"calib/std_conf": 0.01552330273964561,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2753.0,
"completions/max_terminated_length": 2753.0,
"completions/mean_length": 557.76953125,
"completions/mean_terminated_length": 559.9569091796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.1504,
"grad_norm": 0.35210710763931274,
"learning_rate": 1.638888888888889e-06,
"loss": 0.024,
"num_tokens": 31025456.0,
"reward": 1.158670425415039,
"reward_std": 0.312752902507782,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.4237445294857025,
"rewards/format_reward_step_strict": 0.94140625,
"step": 141
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.714073346929638e-06,
"aux_brier/mean_group_std": 0.058085549976393896,
"aux_brier/mean_r": 0.9624802664772752,
"aux_brier/n_active_tok": 273.625,
"aux_brier/n_groups": 15.4375,
"aux_brier/n_step_records": 68.40625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6088179519595448,
"calib/avg_num_step_conf": 8.55078125,
"calib/ece": 0.49596837944664024,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004401390644753464,
"calib/mean_conf": 0.05739130434782609,
"calib/mu_c": 0.05935714285714285,
"calib/mu_w": 0.05495575221238939,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0156389931326737,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1721.0,
"completions/max_terminated_length": 1721.0,
"completions/mean_length": 552.61328125,
"completions/mean_terminated_length": 556.9645385742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.5921297669410706,
"learning_rate": 1.6111111111111113e-06,
"loss": -0.0131,
"num_tokens": 31272085.0,
"reward": 1.1667234897613525,
"reward_std": 0.18465447425842285,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.5028312802314758,
"rewards/format_reward_step_strict": 0.98828125,
"step": 142
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -6.382482328870509e-06,
"aux_brier/mean_group_std": 0.06155460154384764,
"aux_brier/mean_r": 0.9625660744821987,
"aux_brier/n_active_tok": 258.875,
"aux_brier/n_groups": 14.78125,
"aux_brier/n_step_records": 64.71875,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5420883016435707,
"calib/avg_num_step_conf": 8.08984375,
"calib/ece": 0.5163888888888888,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0013619078311311733,
"calib/mean_conf": 0.05900793650793651,
"calib/mu_c": 0.05958620689655173,
"calib/mu_w": 0.05822429906542056,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.015942852322794843,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2866.0,
"completions/max_terminated_length": 2866.0,
"completions/mean_length": 524.44921875,
"completions/mean_terminated_length": 524.44921875,
"completions/min_length": 203.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.48159152269363403,
"learning_rate": 1.5833333333333333e-06,
"loss": 0.0431,
"num_tokens": 31513680.0,
"reward": 1.1731898784637451,
"reward_std": 0.1981586366891861,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.47400975227355957,
"rewards/format_reward_step_strict": 0.9765625,
"step": 143
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.5024501462956863e-06,
"aux_brier/mean_group_std": 0.02612270270458595,
"aux_brier/mean_r": 0.9838132009374825,
"aux_brier/n_active_tok": 268.125,
"aux_brier/n_groups": 15.1875,
"aux_brier/n_step_records": 67.03125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5332445036642238,
"calib/avg_num_step_conf": 8.37890625,
"calib/ece": 0.5641106719367588,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0008014656895403011,
"calib/mean_conf": 0.06039525691699605,
"calib/mu_c": 0.06069620253164556,
"calib/mu_w": 0.05989473684210526,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.016389584361016573,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2603.0,
"completions/max_terminated_length": 2603.0,
"completions/mean_length": 541.16015625,
"completions/mean_terminated_length": 541.16015625,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.1536,
"grad_norm": 0.38825613260269165,
"learning_rate": 1.5555555555555558e-06,
"loss": -0.004,
"num_tokens": 31756345.0,
"reward": 1.2218644618988037,
"reward_std": 0.2094503939151764,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.44214531779289246,
"rewards/format_reward_step_strict": 0.98828125,
"step": 144
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.3559944934969934e-06,
"aux_brier/mean_group_std": 0.06517902483204491,
"aux_brier/mean_r": 0.9474553158431747,
"aux_brier/n_active_tok": 280.125,
"aux_brier/n_groups": 16.25,
"aux_brier/n_step_records": 70.03125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4458034038697987,
"calib/avg_num_step_conf": 8.75390625,
"calib/ece": 0.5351574803149606,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.005481783472464891,
"calib/mean_conf": 0.0691732283464567,
"calib/mu_c": 0.06699346405228758,
"calib/mu_w": 0.07247524752475247,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.000984251968503937,
"calib/std_conf": 0.027011690233356714,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2537.0,
"completions/max_terminated_length": 2537.0,
"completions/mean_length": 515.55078125,
"completions/mean_terminated_length": 515.55078125,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.9937516450881958,
"learning_rate": 1.527777777777778e-06,
"loss": 0.0377,
"num_tokens": 31991030.0,
"reward": 1.2110344171524048,
"reward_std": 0.2681943476200104,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.46913787722587585,
"rewards/format_reward_step_strict": 0.9921875,
"step": 145
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.8197305471367997e-06,
"aux_brier/mean_group_std": 0.03062187333883319,
"aux_brier/mean_r": 0.983849446933646,
"aux_brier/n_active_tok": 271.375,
"aux_brier/n_groups": 16.96875,
"aux_brier/n_step_records": 67.84375,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5542513316235944,
"calib/avg_num_step_conf": 8.6171875,
"calib/ece": 0.3800806451612903,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.007624120470835785,
"calib/mean_conf": 0.06749999999999999,
"calib/mu_c": 0.0717117117117117,
"calib/mu_w": 0.06408759124087592,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.0394544449435859,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2625.0,
"completions/max_terminated_length": 2625.0,
"completions/mean_length": 536.5390625,
"completions/mean_terminated_length": 538.6431884765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.22396814823150635,
"learning_rate": 1.5e-06,
"loss": 0.0678,
"num_tokens": 32235600.0,
"reward": 1.0630507469177246,
"reward_std": 0.23441281914710999,
"rewards/accuracy_reward_step": 0.43359375,
"rewards/final_brier_reward_step": 0.588140606880188,
"rewards/format_reward_step_strict": 0.96484375,
"step": 146
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.0655819099602013e-07,
"aux_brier/mean_group_std": 0.04439007321310594,
"aux_brier/mean_r": 0.967196735743911,
"aux_brier/n_active_tok": 261.5,
"aux_brier/n_groups": 14.28125,
"aux_brier/n_step_records": 65.375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5725832215193917,
"calib/avg_num_step_conf": 8.171875,
"calib/ece": 0.37253968253968256,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004976039869656904,
"calib/mean_conf": 0.06793650793650792,
"calib/mu_c": 0.07072072072072072,
"calib/mu_w": 0.06574468085106382,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.028488857758374963,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2768.0,
"completions/max_terminated_length": 2768.0,
"completions/mean_length": 534.3359375,
"completions/mean_terminated_length": 534.3359375,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.1568,
"grad_norm": 0.7205764651298523,
"learning_rate": 1.4722222222222225e-06,
"loss": 0.0173,
"num_tokens": 32476070.0,
"reward": 1.0745468139648438,
"reward_std": 0.20038121938705444,
"rewards/accuracy_reward_step": 0.43359375,
"rewards/final_brier_reward_step": 0.6028749942779541,
"rewards/format_reward_step_strict": 0.98046875,
"step": 147
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.0828740335949716e-06,
"aux_brier/mean_group_std": 0.06532441346014504,
"aux_brier/mean_r": 0.9624720179970169,
"aux_brier/n_active_tok": 264.25,
"aux_brier/n_groups": 14.34375,
"aux_brier/n_step_records": 66.0625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.566489723696747,
"calib/avg_num_step_conf": 8.265625,
"calib/ece": 0.5632270916334662,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.003103988022322021,
"calib/mean_conf": 0.06625498007968127,
"calib/mu_c": 0.06740506329113923,
"calib/mu_w": 0.0643010752688172,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0176146988234553,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3000.0,
"completions/max_terminated_length": 3000.0,
"completions/mean_length": 521.12109375,
"completions/mean_terminated_length": 525.2244262695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.35711437463760376,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0267,
"num_tokens": 32714589.0,
"reward": 1.209111213684082,
"reward_std": 0.2663658559322357,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.43019530177116394,
"rewards/format_reward_step_strict": 0.96875,
"step": 148
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.378460294938378e-06,
"aux_brier/mean_group_std": 0.04840870256089264,
"aux_brier/mean_r": 0.9751864015836805,
"aux_brier/n_active_tok": 283.875,
"aux_brier/n_groups": 16.8125,
"aux_brier/n_step_records": 70.96875,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5727946916471508,
"calib/avg_num_step_conf": 9.10546875,
"calib/ece": 0.42709677419354836,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.004032258064516129,
"calib/gap": -0.000554254488680711,
"calib/mean_conf": 0.0728225806451613,
"calib/mu_c": 0.07254098360655739,
"calib/mu_w": 0.0730952380952381,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.003991935483870968,
"calib/std_conf": 0.06446505034451548,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2578.0,
"completions/max_terminated_length": 2578.0,
"completions/mean_length": 580.8515625,
"completions/mean_terminated_length": 587.7391357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.6782702803611755,
"learning_rate": 1.4166666666666667e-06,
"loss": 0.0128,
"num_tokens": 32967743.0,
"reward": 1.0911545753479004,
"reward_std": 0.23866769671440125,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.5443058609962463,
"rewards/format_reward_step_strict": 0.95703125,
"step": 149
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.712223138637507e-06,
"aux_brier/mean_group_std": 0.0703186177078078,
"aux_brier/mean_r": 0.9530558812833309,
"aux_brier/n_active_tok": 263.5,
"aux_brier/n_groups": 16.5,
"aux_brier/n_step_records": 65.875,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.6715841584158416,
"calib/avg_num_step_conf": 8.234375,
"calib/ece": 0.5269322709163347,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009469306930693075,
"calib/mean_conf": 0.07258964143426296,
"calib/mu_c": 0.07640000000000001,
"calib/mu_w": 0.06693069306930693,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0009561752988047808,
"calib/std_conf": 0.02238887859478425,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2796.0,
"completions/max_terminated_length": 2796.0,
"completions/mean_length": 496.140625,
"completions/mean_terminated_length": 496.140625,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.16,
"grad_norm": 0.04868630692362785,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.0303,
"num_tokens": 33199715.0,
"reward": 1.1928496360778809,
"reward_std": 0.20058949291706085,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.4745234251022339,
"rewards/format_reward_step_strict": 0.9765625,
"step": 150
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -8.773511487314423e-07,
"aux_brier/mean_group_std": 0.06375056078987522,
"aux_brier/mean_r": 0.9548843774785912,
"aux_brier/n_active_tok": 259.75,
"aux_brier/n_groups": 15.28125,
"aux_brier/n_step_records": 64.9375,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5365328563857975,
"calib/avg_num_step_conf": 8.328125,
"calib/ece": 0.3739271255060729,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0021813725490195945,
"calib/mean_conf": 0.07546558704453442,
"calib/mu_c": 0.07666666666666666,
"calib/mu_w": 0.07448529411764707,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.028506797079224936,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2929.0,
"completions/max_terminated_length": 2929.0,
"completions/mean_length": 526.30859375,
"completions/mean_terminated_length": 530.4527587890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.16106666666666666,
"grad_norm": 1.04205322265625,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0793,
"num_tokens": 33441474.0,
"reward": 1.0522384643554688,
"reward_std": 0.2746904194355011,
"rewards/accuracy_reward_step": 0.43359375,
"rewards/final_brier_reward_step": 0.5761418342590332,
"rewards/format_reward_step_strict": 0.94921875,
"step": 151
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.2677447153164323e-06,
"aux_brier/mean_group_std": 0.028382429319569653,
"aux_brier/mean_r": 0.9833729546309443,
"aux_brier/n_active_tok": 272.75,
"aux_brier/n_groups": 16.03125,
"aux_brier/n_step_records": 68.1875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5209770475354321,
"calib/avg_num_step_conf": 8.5703125,
"calib/ece": 0.3976679841897233,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0003148124921610457,
"calib/mean_conf": 0.07268774703557312,
"calib/mu_c": 0.07252100840336136,
"calib/mu_w": 0.0728358208955224,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.02199284217153069,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2459.0,
"completions/max_terminated_length": 2459.0,
"completions/mean_length": 508.5859375,
"completions/mean_terminated_length": 512.590576171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 208.0,
"epoch": 0.16213333333333332,
"grad_norm": 1.1266095638275146,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0088,
"num_tokens": 33677064.0,
"reward": 1.1023454666137695,
"reward_std": 0.2495948076248169,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.5812570452690125,
"rewards/format_reward_step_strict": 0.984375,
"step": 152
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.792688601622629e-06,
"aux_brier/mean_group_std": 0.07092418107172463,
"aux_brier/mean_r": 0.9471419582640936,
"aux_brier/n_active_tok": 263.125,
"aux_brier/n_groups": 15.15625,
"aux_brier/n_step_records": 65.78125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5624302134646962,
"calib/avg_num_step_conf": 8.39453125,
"calib/ece": 0.50472,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004013136288998367,
"calib/mean_conf": 0.07528,
"calib/mu_c": 0.07696551724137932,
"calib/mu_w": 0.07295238095238095,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.023700666657290467,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3059.0,
"completions/max_terminated_length": 3059.0,
"completions/mean_length": 529.859375,
"completions/mean_terminated_length": 531.937255859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.1632,
"grad_norm": 0.46357324719429016,
"learning_rate": 1.3055555555555556e-06,
"loss": 0.0795,
"num_tokens": 33920028.0,
"reward": 1.1745765209197998,
"reward_std": 0.18833814561367035,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.48736873269081116,
"rewards/format_reward_step_strict": 0.97265625,
"step": 153
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.3004426348055595e-06,
"aux_brier/mean_group_std": 0.06479431974825353,
"aux_brier/mean_r": 0.9635108167995876,
"aux_brier/n_active_tok": 267.625,
"aux_brier/n_groups": 16.09375,
"aux_brier/n_step_records": 66.90625,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5803173187205303,
"calib/avg_num_step_conf": 8.36328125,
"calib/ece": 0.39593625498007967,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.008738371352109087,
"calib/mean_conf": 0.07418326693227092,
"calib/mu_c": 0.0788135593220339,
"calib/mu_w": 0.07007518796992482,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.027650894938876375,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2674.0,
"completions/max_terminated_length": 2674.0,
"completions/mean_length": 508.1953125,
"completions/mean_terminated_length": 508.1953125,
"completions/min_length": 161.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.3397197127342224,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.1079,
"num_tokens": 34154566.0,
"reward": 1.0976823568344116,
"reward_std": 0.2694038152694702,
"rewards/accuracy_reward_step": 0.4609375,
"rewards/final_brier_reward_step": 0.5860421657562256,
"rewards/format_reward_step_strict": 0.98046875,
"step": 154
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.0855995707401718e-06,
"aux_brier/mean_group_std": 0.06309926270947872,
"aux_brier/mean_r": 0.9577261976476592,
"aux_brier/n_active_tok": 265.25,
"aux_brier/n_groups": 15.3125,
"aux_brier/n_step_records": 66.3125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5155589704187475,
"calib/avg_num_step_conf": 8.56640625,
"calib/ece": 0.37792828685258967,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0036470738891023202,
"calib/mean_conf": 0.07625498007968128,
"calib/mu_c": 0.07824561403508772,
"calib/mu_w": 0.0745985401459854,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.036208761748919856,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2853.0,
"completions/max_terminated_length": 2853.0,
"completions/mean_length": 499.01171875,
"completions/mean_terminated_length": 502.9409484863281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.38062500953674316,
"learning_rate": 1.25e-06,
"loss": 0.0251,
"num_tokens": 34389529.0,
"reward": 1.0800366401672363,
"reward_std": 0.2779013216495514,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.5935840010643005,
"rewards/format_reward_step_strict": 0.97265625,
"step": 155
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.697006269203328e-06,
"aux_brier/mean_group_std": 0.049255267466241745,
"aux_brier/mean_r": 0.9655943838545354,
"aux_brier/n_active_tok": 295.0,
"aux_brier/n_groups": 19.09375,
"aux_brier/n_step_records": 73.75,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5478144746270602,
"calib/avg_num_step_conf": 9.70703125,
"calib/ece": 0.3997983870967742,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0063846003517686045,
"calib/mean_conf": 0.08004032258064517,
"calib/mu_c": 0.08336134453781513,
"calib/mu_w": 0.07697674418604653,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.03213502239632129,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2590.0,
"completions/max_terminated_length": 2590.0,
"completions/mean_length": 532.84765625,
"completions/mean_terminated_length": 539.166015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.1664,
"grad_norm": 0.3053121864795685,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.0197,
"num_tokens": 34630698.0,
"reward": 1.086916446685791,
"reward_std": 0.24515119194984436,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.5664156675338745,
"rewards/format_reward_step_strict": 0.9609375,
"step": 156
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.0148835825906097e-06,
"aux_brier/mean_group_std": 0.07082002987039873,
"aux_brier/mean_r": 0.9552137337020143,
"aux_brier/n_active_tok": 282.375,
"aux_brier/n_groups": 18.0,
"aux_brier/n_step_records": 70.59375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6178282633808241,
"calib/avg_num_step_conf": 8.82421875,
"calib/ece": 0.4973517786561264,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009898600949813902,
"calib/mean_conf": 0.08367588932806325,
"calib/mu_c": 0.08782312925170069,
"calib/mu_w": 0.07792452830188679,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.032829745437239044,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2592.0,
"completions/max_terminated_length": 2592.0,
"completions/mean_length": 491.51171875,
"completions/mean_terminated_length": 493.4392395019531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.22145549952983856,
"learning_rate": 1.1944444444444446e-06,
"loss": 0.0098,
"num_tokens": 34860253.0,
"reward": 1.1950936317443848,
"reward_std": 0.20999005436897278,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/final_brier_reward_step": 0.5069370865821838,
"rewards/format_reward_step_strict": 0.98828125,
"step": 157
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.561560416240339e-06,
"aux_brier/mean_group_std": 0.05701335252611965,
"aux_brier/mean_r": 0.9607430173743206,
"aux_brier/n_active_tok": 269.5,
"aux_brier/n_groups": 16.09375,
"aux_brier/n_step_records": 67.375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5089937267955447,
"calib/avg_num_step_conf": 8.4296875,
"calib/ece": 0.4960474308300396,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00031366022276275995,
"calib/mean_conf": 0.08355731225296444,
"calib/mu_c": 0.08342465753424658,
"calib/mu_w": 0.08373831775700934,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0012648221343873518,
"calib/std_conf": 0.03333850928688426,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2655.0,
"completions/max_terminated_length": 2655.0,
"completions/mean_length": 489.6640625,
"completions/mean_terminated_length": 491.5843505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.2728252112865448,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0528,
"num_tokens": 35090847.0,
"reward": 1.1907347440719604,
"reward_std": 0.25884896516799927,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.5051265954971313,
"rewards/format_reward_step_strict": 0.98828125,
"step": 158
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.549838194228828e-06,
"aux_brier/mean_group_std": 0.045389470776766,
"aux_brier/mean_r": 0.9715923945016651,
"aux_brier/n_active_tok": 252.75,
"aux_brier/n_groups": 14.625,
"aux_brier/n_step_records": 63.1875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5049873737373738,
"calib/avg_num_step_conf": 7.9609375,
"calib/ece": 0.44936507936507936,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0019621212121212067,
"calib/mean_conf": 0.07444444444444444,
"calib/mu_c": 0.07537878787878788,
"calib/mu_w": 0.07341666666666667,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.026172673121829793,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2500.0,
"completions/max_terminated_length": 2500.0,
"completions/mean_length": 465.7109375,
"completions/mean_terminated_length": 467.53729248046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.1696,
"grad_norm": 0.16256766021251678,
"learning_rate": 1.138888888888889e-06,
"loss": 0.0475,
"num_tokens": 35314853.0,
"reward": 1.142901062965393,
"reward_std": 0.2668300271034241,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.5403547286987305,
"rewards/format_reward_step_strict": 0.984375,
"step": 159
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.4189522139496091e-06,
"aux_brier/mean_group_std": 0.048682114241358466,
"aux_brier/mean_r": 0.9764424962548897,
"aux_brier/n_active_tok": 283.0,
"aux_brier/n_groups": 19.0625,
"aux_brier/n_step_records": 70.75,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5760825982357658,
"calib/avg_num_step_conf": 8.87109375,
"calib/ece": 0.43897959183673474,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010247928361400677,
"calib/mean_conf": 0.08755102040816327,
"calib/mu_c": 0.0924031007751938,
"calib/mu_w": 0.08215517241379312,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.034601557841825795,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2636.0,
"completions/max_terminated_length": 2636.0,
"completions/mean_length": 516.75,
"completions/mean_terminated_length": 518.7764892578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.3571442663669586,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0678,
"num_tokens": 35551981.0,
"reward": 1.1207700967788696,
"reward_std": 0.27606478333473206,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.5377683639526367,
"rewards/format_reward_step_strict": 0.95703125,
"step": 160
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.788575513085201e-06,
"aux_brier/mean_group_std": 0.07286658959147208,
"aux_brier/mean_r": 0.9612494504089659,
"aux_brier/n_active_tok": 236.625,
"aux_brier/n_groups": 13.28125,
"aux_brier/n_step_records": 59.15625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5555515370705244,
"calib/avg_num_step_conf": 7.48828125,
"calib/ece": 0.6029921259842521,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0062972875226039865,
"calib/mean_conf": 0.08598425196850396,
"calib/mu_c": 0.08794285714285714,
"calib/mu_w": 0.08164556962025316,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.035221679135631255,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2552.0,
"completions/max_terminated_length": 2552.0,
"completions/mean_length": 453.97265625,
"completions/mean_terminated_length": 455.7529602050781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.2653784155845642,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.0191,
"num_tokens": 35772118.0,
"reward": 1.2818372249603271,
"reward_std": 0.21012239158153534,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/final_brier_reward_step": 0.41641169786453247,
"rewards/format_reward_step_strict": 0.98828125,
"step": 161
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.81067918833844e-07,
"aux_brier/mean_group_std": 0.05413243927366356,
"aux_brier/mean_r": 0.9664557423505251,
"aux_brier/n_active_tok": 245.375,
"aux_brier/n_groups": 15.4375,
"aux_brier/n_step_records": 61.34375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6345530650823656,
"calib/avg_num_step_conf": 7.66796875,
"calib/ece": 0.5496442687747036,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01363354037267081,
"calib/mean_conf": 0.08671936758893282,
"calib/mu_c": 0.09167701863354039,
"calib/mu_w": 0.07804347826086958,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.041644888516131295,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2862.0,
"completions/max_terminated_length": 2862.0,
"completions/mean_length": 465.25390625,
"completions/mean_terminated_length": 465.25390625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.1728,
"grad_norm": 0.7708801031112671,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.0651,
"num_tokens": 35995367.0,
"reward": 1.2394322156906128,
"reward_std": 0.21498660743236542,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/final_brier_reward_step": 0.46554142236709595,
"rewards/format_reward_step_strict": 0.98828125,
"step": 162
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -8.302051902955854e-06,
"aux_brier/mean_group_std": 0.0638352361480643,
"aux_brier/mean_r": 0.9578971509819684,
"aux_brier/n_active_tok": 275.875,
"aux_brier/n_groups": 16.375,
"aux_brier/n_step_records": 68.96875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.6277521761392729,
"calib/avg_num_step_conf": 8.921875,
"calib/ece": 0.41291999999999995,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01506144393241167,
"calib/mean_conf": 0.09372000000000001,
"calib/mu_c": 0.10119047619047618,
"calib/mu_w": 0.08612903225806451,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.00132,
"calib/std_conf": 0.04174400076657722,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2546.0,
"completions/max_terminated_length": 2546.0,
"completions/mean_length": 516.421875,
"completions/mean_terminated_length": 520.4881591796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.6164954900741577,
"learning_rate": 1.0277777777777777e-06,
"loss": 0.0461,
"num_tokens": 36232403.0,
"reward": 1.1238950490951538,
"reward_std": 0.21191027760505676,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.5737050771713257,
"rewards/format_reward_step_strict": 0.9765625,
"step": 163
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.3525187265717165e-06,
"aux_brier/mean_group_std": 0.03834141689759305,
"aux_brier/mean_r": 0.9800117110780533,
"aux_brier/n_active_tok": 265.75,
"aux_brier/n_groups": 14.625,
"aux_brier/n_step_records": 66.4375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.527529761904762,
"calib/avg_num_step_conf": 8.421875,
"calib/ece": 0.419484251968504,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0005918898809523926,
"calib/mean_conf": 0.08791732283464566,
"calib/mu_c": 0.08821093750000002,
"calib/mu_w": 0.08761904761904762,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.001732283464566929,
"calib/std_conf": 0.03821114483607079,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1960.0,
"completions/max_terminated_length": 1960.0,
"completions/mean_length": 515.90234375,
"completions/mean_terminated_length": 517.925537109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.14638356864452362,
"learning_rate": 1.0000000000000002e-06,
"loss": -0.0287,
"num_tokens": 36470610.0,
"reward": 1.131101369857788,
"reward_std": 0.24305739998817444,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.5712807178497314,
"rewards/format_reward_step_strict": 0.9765625,
"step": 164
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.987938269669257e-07,
"aux_brier/mean_group_std": 0.04195193778587433,
"aux_brier/mean_r": 0.9751945249083633,
"aux_brier/n_active_tok": 265.25,
"aux_brier/n_groups": 15.0,
"aux_brier/n_step_records": 66.3125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5782344844844846,
"calib/avg_num_step_conf": 8.2890625,
"calib/ece": 0.3345490196078431,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010964714714714718,
"calib/mean_conf": 0.10074509803921569,
"calib/mu_c": 0.10693693693693694,
"calib/mu_w": 0.09597222222222222,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.040678482599022986,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2131.0,
"completions/max_terminated_length": 2131.0,
"completions/mean_length": 504.55078125,
"completions/mean_terminated_length": 504.55078125,
"completions/min_length": 145.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.176,
"grad_norm": 0.04463425651192665,
"learning_rate": 9.722222222222224e-07,
"loss": 0.0131,
"num_tokens": 36705351.0,
"reward": 1.0925097465515137,
"reward_std": 0.21812310814857483,
"rewards/accuracy_reward_step": 0.43359375,
"rewards/final_brier_reward_step": 0.6434761881828308,
"rewards/format_reward_step_strict": 0.99609375,
"step": 165
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.738217014768395e-06,
"aux_brier/mean_group_std": 0.06286699413879694,
"aux_brier/mean_r": 0.9591385617465502,
"aux_brier/n_active_tok": 280.5,
"aux_brier/n_groups": 16.46875,
"aux_brier/n_step_records": 70.125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.6489734927234927,
"calib/avg_num_step_conf": 8.765625,
"calib/ece": 0.4890873015873015,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02069906444906444,
"calib/mean_conf": 0.09821428571428571,
"calib/mu_c": 0.10675675675675676,
"calib/mu_w": 0.08605769230769232,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.042515003354158185,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2663.0,
"completions/max_terminated_length": 2663.0,
"completions/mean_length": 564.296875,
"completions/mean_terminated_length": 564.296875,
"completions/min_length": 179.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.013891350477933884,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0391,
"num_tokens": 36955995.0,
"reward": 1.1921110153198242,
"reward_std": 0.23113755881786346,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.5106316208839417,
"rewards/format_reward_step_strict": 0.97265625,
"step": 166
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.111495625265139e-06,
"aux_brier/mean_group_std": 0.05852985912014213,
"aux_brier/mean_r": 0.9574375097612414,
"aux_brier/n_active_tok": 266.125,
"aux_brier/n_groups": 16.3125,
"aux_brier/n_step_records": 66.53125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.6824582991523106,
"calib/avg_num_step_conf": 8.31640625,
"calib/ece": 0.5209561752988048,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.029357396773311464,
"calib/mean_conf": 0.11250996015936256,
"calib/mu_c": 0.12327044025157234,
"calib/mu_w": 0.09391304347826088,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.04700253129996288,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2644.0,
"completions/max_terminated_length": 2644.0,
"completions/mean_length": 498.57421875,
"completions/mean_terminated_length": 498.57421875,
"completions/min_length": 1.0,
"completions/min_terminated_length": 1.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.16410934925079346,
"learning_rate": 9.166666666666666e-07,
"loss": 0.0268,
"num_tokens": 37189238.0,
"reward": 1.2358087301254272,
"reward_std": 0.25065454840660095,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.4979226589202881,
"rewards/format_reward_step_strict": 0.98046875,
"step": 167
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.6832584311954335e-06,
"aux_brier/mean_group_std": 0.08202811903094603,
"aux_brier/mean_r": 0.9427979547787165,
"aux_brier/n_active_tok": 278.625,
"aux_brier/n_groups": 16.375,
"aux_brier/n_step_records": 69.65625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4515483265561464,
"calib/avg_num_step_conf": 8.7109375,
"calib/ece": 0.42940944881889764,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.011261182358461075,
"calib/mean_conf": 0.12114173228346456,
"calib/mu_c": 0.11604316546762589,
"calib/mu_w": 0.12730434782608696,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.001653543307086614,
"calib/std_conf": 0.045444822372452516,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2457.0,
"completions/max_terminated_length": 2457.0,
"completions/mean_length": 537.90625,
"completions/mean_terminated_length": 537.90625,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.1792,
"grad_norm": 0.3969951570034027,
"learning_rate": 8.88888888888889e-07,
"loss": 0.021,
"num_tokens": 37431614.0,
"reward": 1.1758363246917725,
"reward_std": 0.2561456263065338,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.5549074411392212,
"rewards/format_reward_step_strict": 0.98828125,
"step": 168
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.90437492831164e-06,
"aux_brier/mean_group_std": 0.048136441288030074,
"aux_brier/mean_r": 0.9676011564396277,
"aux_brier/n_active_tok": 256.125,
"aux_brier/n_groups": 14.34375,
"aux_brier/n_step_records": 64.03125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5763746145940392,
"calib/avg_num_step_conf": 8.27734375,
"calib/ece": 0.44027888446215147,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.011812692702980451,
"calib/mean_conf": 0.11350597609561754,
"calib/mu_c": 0.11877697841726617,
"calib/mu_w": 0.10696428571428572,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.04619871943499832,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2677.0,
"completions/max_terminated_length": 2677.0,
"completions/mean_length": 500.6953125,
"completions/mean_terminated_length": 506.6324462890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.18512670695781708,
"learning_rate": 8.611111111111112e-07,
"loss": -0.0053,
"num_tokens": 37663976.0,
"reward": 1.1750493049621582,
"reward_std": 0.21191146969795227,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.5517597794532776,
"rewards/format_reward_step_strict": 0.98046875,
"step": 169
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.415873636585353e-06,
"aux_brier/mean_group_std": 0.05310493198191623,
"aux_brier/mean_r": 0.9692374323947502,
"aux_brier/n_active_tok": 278.625,
"aux_brier/n_groups": 14.25,
"aux_brier/n_step_records": 69.65625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5742113862008296,
"calib/avg_num_step_conf": 8.75,
"calib/ece": 0.45572549019607844,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.012558753298982026,
"calib/mean_conf": 0.1168235294117647,
"calib/mu_c": 0.12219178082191778,
"calib/mu_w": 0.10963302752293576,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.04897886934633356,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1378.0,
"completions/max_terminated_length": 1378.0,
"completions/mean_length": 494.00390625,
"completions/mean_terminated_length": 495.9411926269531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.13570146262645721,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0058,
"num_tokens": 37894593.0,
"reward": 1.2027275562286377,
"reward_std": 0.20788493752479553,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.5452852249145508,
"rewards/format_reward_step_strict": 0.9921875,
"step": 170
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.163024858464982e-06,
"aux_brier/mean_group_std": 0.06589841047410823,
"aux_brier/mean_r": 0.9508623940262273,
"aux_brier/n_active_tok": 260.125,
"aux_brier/n_groups": 13.25,
"aux_brier/n_step_records": 65.03125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5535850495804729,
"calib/avg_num_step_conf": 8.2265625,
"calib/ece": 0.33789682539682536,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00531273836765829,
"calib/mean_conf": 0.11972222222222223,
"calib/mu_c": 0.12263157894736842,
"calib/mu_w": 0.11731884057971013,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0026190476190476185,
"calib/std_conf": 0.04683136564056339,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1329.0,
"completions/max_terminated_length": 1329.0,
"completions/mean_length": 480.30078125,
"completions/mean_terminated_length": 484.0826721191406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.1824,
"grad_norm": 0.20341052114963531,
"learning_rate": 8.055555555555557e-07,
"loss": 0.0151,
"num_tokens": 38124446.0,
"reward": 1.0994094610214233,
"reward_std": 0.2636204957962036,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.6320128440856934,
"rewards/format_reward_step_strict": 0.984375,
"step": 171
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.624893337774495e-06,
"aux_brier/mean_group_std": 0.05225646806897729,
"aux_brier/mean_r": 0.971827501106451,
"aux_brier/n_active_tok": 247.25,
"aux_brier/n_groups": 13.8125,
"aux_brier/n_step_records": 61.8125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.569299808166621,
"calib/avg_num_step_conf": 7.81640625,
"calib/ece": 0.5239525691699605,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00609619073718827,
"calib/mean_conf": 0.1282213438735178,
"calib/mu_c": 0.1303658536585366,
"calib/mu_w": 0.12426966292134832,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.001976284584980237,
"calib/std_conf": 0.047477875219391966,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1934.0,
"completions/max_terminated_length": 1934.0,
"completions/mean_length": 460.02734375,
"completions/mean_terminated_length": 461.8313903808594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.2539040446281433,
"learning_rate": 7.777777777777779e-07,
"loss": -0.0053,
"num_tokens": 38345565.0,
"reward": 1.260039210319519,
"reward_std": 0.26275694370269775,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/final_brier_reward_step": 0.4932820200920105,
"rewards/format_reward_step_strict": 0.984375,
"step": 172
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.9148429181447e-07,
"aux_brier/mean_group_std": 0.07979615823018552,
"aux_brier/mean_r": 0.9540008730685733,
"aux_brier/n_active_tok": 269.125,
"aux_brier/n_groups": 17.71875,
"aux_brier/n_step_records": 67.28125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5855387523629489,
"calib/avg_num_step_conf": 8.41015625,
"calib/ece": 0.5124110671936759,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.012872670807453385,
"calib/mean_conf": 0.12395256916996049,
"calib/mu_c": 0.1286335403726708,
"calib/mu_w": 0.11576086956521742,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.04886245237698008,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2956.0,
"completions/max_terminated_length": 2956.0,
"completions/mean_length": 510.25390625,
"completions/mean_terminated_length": 510.25390625,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.09980254620313644,
"learning_rate": 7.5e-07,
"loss": 0.0408,
"num_tokens": 38579350.0,
"reward": 1.2439329624176025,
"reward_std": 0.27457743883132935,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/final_brier_reward_step": 0.49916914105415344,
"rewards/format_reward_step_strict": 0.98046875,
"step": 173
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.077650419567533e-06,
"aux_brier/mean_group_std": 0.06339299942105313,
"aux_brier/mean_r": 0.9601915076758774,
"aux_brier/n_active_tok": 260.625,
"aux_brier/n_groups": 14.6875,
"aux_brier/n_step_records": 65.15625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6350469066937119,
"calib/avg_num_step_conf": 8.1484375,
"calib/ece": 0.3348015873015873,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02428245436105478,
"calib/mean_conf": 0.12551587301587303,
"calib/mu_c": 0.13862068965517244,
"calib/mu_w": 0.11433823529411766,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.04648062134638241,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2485.0,
"completions/max_terminated_length": 2485.0,
"completions/mean_length": 526.45703125,
"completions/mean_terminated_length": 526.45703125,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.1856,
"grad_norm": 0.0846116691827774,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0003,
"num_tokens": 38818355.0,
"reward": 1.1021990776062012,
"reward_std": 0.2749863564968109,
"rewards/accuracy_reward_step": 0.453125,
"rewards/final_brier_reward_step": 0.635358989238739,
"rewards/format_reward_step_strict": 0.98046875,
"step": 174
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 8.318068470680728e-07,
"aux_brier/mean_group_std": 0.05940279317404167,
"aux_brier/mean_r": 0.9610518133940902,
"aux_brier/n_active_tok": 256.25,
"aux_brier/n_groups": 15.59375,
"aux_brier/n_step_records": 64.0625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5991245791245792,
"calib/avg_num_step_conf": 8.53125,
"calib/ece": 0.28347791164658626,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.004016064257028112,
"calib/gap": 0.012502626262626257,
"calib/mean_conf": 0.12206425702811245,
"calib/mu_c": 0.12959595959595957,
"calib/mu_w": 0.11709333333333331,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.003975903614457831,
"calib/std_conf": 0.07521788890146662,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2937.0,
"completions/max_terminated_length": 2937.0,
"completions/mean_length": 480.47265625,
"completions/mean_terminated_length": 490.0438537597656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.49778616428375244,
"learning_rate": 6.944444444444446e-07,
"loss": -0.0333,
"num_tokens": 39047180.0,
"reward": 1.0327746868133545,
"reward_std": 0.2279902696609497,
"rewards/accuracy_reward_step": 0.38671875,
"rewards/final_brier_reward_step": 0.6623488664627075,
"rewards/format_reward_step_strict": 0.9609375,
"step": 175
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.015040904661646e-07,
"aux_brier/mean_group_std": 0.07720815796335934,
"aux_brier/mean_r": 0.9545459530610562,
"aux_brier/n_active_tok": 264.0,
"aux_brier/n_groups": 15.6875,
"aux_brier/n_step_records": 66.0,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.680995043842928,
"calib/avg_num_step_conf": 8.25,
"calib/ece": 0.3602390438247012,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.028872791968483927,
"calib/mean_conf": 0.1258167330677291,
"calib/mu_c": 0.14065573770491804,
"calib/mu_w": 0.11178294573643412,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.045512030269889224,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2823.0,
"completions/max_terminated_length": 2823.0,
"completions/mean_length": 475.484375,
"completions/mean_terminated_length": 477.34906005859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.08645545691251755,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0035,
"num_tokens": 39272968.0,
"reward": 1.1219011545181274,
"reward_std": 0.2135622501373291,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.6204172372817993,
"rewards/format_reward_step_strict": 0.98046875,
"step": 176
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.432011142558757e-07,
"aux_brier/mean_group_std": 0.05504437234161081,
"aux_brier/mean_r": 0.9677546085845754,
"aux_brier/n_active_tok": 276.125,
"aux_brier/n_groups": 18.28125,
"aux_brier/n_step_records": 69.03125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4958063286313381,
"calib/avg_num_step_conf": 8.7578125,
"calib/ece": 0.36645418326693224,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00045622061253018653,
"calib/mean_conf": 0.11960159362549802,
"calib/mu_c": 0.11983606557377051,
"calib/mu_w": 0.11937984496124032,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.04477300692219342,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3016.0,
"completions/max_terminated_length": 3016.0,
"completions/mean_length": 542.4453125,
"completions/mean_terminated_length": 544.5725708007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.1888,
"grad_norm": 0.0409964844584465,
"learning_rate": 6.388888888888889e-07,
"loss": 0.0733,
"num_tokens": 39515666.0,
"reward": 1.112450122833252,
"reward_std": 0.2231827825307846,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.598237931728363,
"rewards/format_reward_step_strict": 0.97265625,
"step": 177
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.233472673836891e-06,
"aux_brier/mean_group_std": 0.07240147272896671,
"aux_brier/mean_r": 0.957393248241993,
"aux_brier/n_active_tok": 249.0,
"aux_brier/n_groups": 12.5625,
"aux_brier/n_step_records": 62.25,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5090874668686104,
"calib/avg_num_step_conf": 7.921875,
"calib/ece": 0.4302766798418972,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0027899785434810087,
"calib/mean_conf": 0.12837944664031622,
"calib/mu_c": 0.1271223021582734,
"calib/mu_w": 0.1299122807017544,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.004624505928853755,
"calib/std_conf": 0.05458559558659771,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2501.0,
"completions/max_terminated_length": 2501.0,
"completions/mean_length": 476.640625,
"completions/mean_terminated_length": 478.50982666015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.28055137395858765,
"learning_rate": 6.111111111111112e-07,
"loss": 0.029,
"num_tokens": 39743758.0,
"reward": 1.1723394393920898,
"reward_std": 0.29497209191322327,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.5565453171730042,
"rewards/format_reward_step_strict": 0.98046875,
"step": 178
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -5.454093463308496e-06,
"aux_brier/mean_group_std": 0.042397692019474235,
"aux_brier/mean_r": 0.9736497694609709,
"aux_brier/n_active_tok": 266.75,
"aux_brier/n_groups": 15.9375,
"aux_brier/n_step_records": 66.6875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5346933386149072,
"calib/avg_num_step_conf": 8.3359375,
"calib/ece": 0.4836111111111111,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.006316102198455145,
"calib/mean_conf": 0.12353174603174603,
"calib/mu_c": 0.12601307189542485,
"calib/mu_w": 0.1196969696969697,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.043450242753799106,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3018.0,
"completions/max_terminated_length": 3018.0,
"completions/mean_length": 503.9609375,
"completions/mean_terminated_length": 505.9372863769531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.162491112947464,
"learning_rate": 5.833333333333334e-07,
"loss": 0.027,
"num_tokens": 39979036.0,
"reward": 1.2147315740585327,
"reward_std": 0.2703174948692322,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.5151761770248413,
"rewards/format_reward_step_strict": 0.9765625,
"step": 179
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.7154782352979403e-07,
"aux_brier/mean_group_std": 0.07559057480052574,
"aux_brier/mean_r": 0.9453568509669251,
"aux_brier/n_active_tok": 293.75,
"aux_brier/n_groups": 17.09375,
"aux_brier/n_step_records": 73.4375,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5771254500138466,
"calib/avg_num_step_conf": 9.3125,
"calib/ece": 0.500281124497992,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.012794239822763773,
"calib/mean_conf": 0.1302409638554217,
"calib/mu_c": 0.13496815286624206,
"calib/mu_w": 0.12217391304347829,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.046671781511234696,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2580.0,
"completions/max_terminated_length": 2580.0,
"completions/mean_length": 592.5859375,
"completions/mean_terminated_length": 599.6126708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.192,
"grad_norm": 0.15977561473846436,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0119,
"num_tokens": 40234594.0,
"reward": 1.2240822315216064,
"reward_std": 0.2369544804096222,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.5057042837142944,
"rewards/format_reward_step_strict": 0.96875,
"step": 180
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -3.2233169919015925e-06,
"aux_brier/mean_group_std": 0.04418487868754871,
"aux_brier/mean_r": 0.9695073453059229,
"aux_brier/n_active_tok": 246.375,
"aux_brier/n_groups": 14.25,
"aux_brier/n_step_records": 61.59375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.595789124668435,
"calib/avg_num_step_conf": 7.76171875,
"calib/ece": 0.2915261044176707,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01340782493368703,
"calib/mean_conf": 0.12863453815261044,
"calib/mu_c": 0.1364423076923077,
"calib/mu_w": 0.12303448275862067,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0012449799196787147,
"calib/std_conf": 0.045440094416825214,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2936.0,
"completions/max_terminated_length": 2936.0,
"completions/mean_length": 456.140625,
"completions/mean_terminated_length": 459.7322692871094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.12891103327274323,
"learning_rate": 5.277777777777779e-07,
"loss": 0.0454,
"num_tokens": 40457630.0,
"reward": 1.0583572387695312,
"reward_std": 0.26521506905555725,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/final_brier_reward_step": 0.655303955078125,
"rewards/format_reward_step_strict": 0.96875,
"step": 181
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.926116698969249e-06,
"aux_brier/mean_group_std": 0.06110215558689792,
"aux_brier/mean_r": 0.9663731998498992,
"aux_brier/n_active_tok": 283.0,
"aux_brier/n_groups": 17.0625,
"aux_brier/n_step_records": 70.75,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.47959572845156373,
"calib/avg_num_step_conf": 8.84375,
"calib/ece": 0.41738095238095235,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.002128146453089258,
"calib/mean_conf": 0.13023809523809524,
"calib/mu_c": 0.1292753623188406,
"calib/mu_w": 0.13140350877192986,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.04473846904225599,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2840.0,
"completions/max_terminated_length": 2840.0,
"completions/mean_length": 531.60546875,
"completions/mean_terminated_length": 531.60546875,
"completions/min_length": 211.0,
"completions/min_terminated_length": 211.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.29006049036979675,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0404,
"num_tokens": 40699881.0,
"reward": 1.1646442413330078,
"reward_std": 0.24191956222057343,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.5570140480995178,
"rewards/format_reward_step_strict": 0.97265625,
"step": 182
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -2.3976111822587853e-06,
"aux_brier/mean_group_std": 0.04886749718470261,
"aux_brier/mean_r": 0.9676434180165906,
"aux_brier/n_active_tok": 283.625,
"aux_brier/n_groups": 18.5625,
"aux_brier/n_step_records": 70.90625,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5388257575757576,
"calib/avg_num_step_conf": 8.984375,
"calib/ece": 0.35023809523809524,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005977272727272748,
"calib/mean_conf": 0.12595238095238095,
"calib/mu_c": 0.12908333333333336,
"calib/mu_w": 0.12310606060606061,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0481429513446396,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2446.0,
"completions/max_terminated_length": 2446.0,
"completions/mean_length": 542.4921875,
"completions/mean_terminated_length": 544.61962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.0,
"epoch": 0.1952,
"grad_norm": 0.07587724924087524,
"learning_rate": 4.7222222222222226e-07,
"loss": 0.016,
"num_tokens": 40945439.0,
"reward": 1.1098310947418213,
"reward_std": 0.287031352519989,
"rewards/accuracy_reward_step": 0.46875,
"rewards/final_brier_reward_step": 0.6111996173858643,
"rewards/format_reward_step_strict": 0.9765625,
"step": 183
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.3190019493292127e-06,
"aux_brier/mean_group_std": 0.06916217819761528,
"aux_brier/mean_r": 0.9544377919865814,
"aux_brier/n_active_tok": 269.625,
"aux_brier/n_groups": 15.40625,
"aux_brier/n_step_records": 67.40625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5821722399150744,
"calib/avg_num_step_conf": 8.734375,
"calib/ece": 0.49090909090909085,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.013527070063694233,
"calib/mean_conf": 0.12964426877470356,
"calib/mu_c": 0.13477707006369424,
"calib/mu_w": 0.12125000000000001,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.050891964320219044,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2136.0,
"completions/max_terminated_length": 2136.0,
"completions/mean_length": 486.44140625,
"completions/mean_terminated_length": 490.2716369628906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.2684814929962158,
"learning_rate": 4.444444444444445e-07,
"loss": -0.0159,
"num_tokens": 41175248.0,
"reward": 1.237707495689392,
"reward_std": 0.23681330680847168,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.5211422443389893,
"rewards/format_reward_step_strict": 0.98828125,
"step": 184
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -6.602204194461336e-07,
"aux_brier/mean_group_std": 0.08506167947464682,
"aux_brier/mean_r": 0.9477614360696893,
"aux_brier/n_active_tok": 311.75,
"aux_brier/n_groups": 22.6875,
"aux_brier/n_step_records": 77.9375,
"calib/answer_extract_rate": 0.94921875,
"calib/auroc": 0.5431345353675451,
"calib/avg_num_step_conf": 10.2578125,
"calib/ece": 0.4515637860082305,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002054785020804445,
"calib/mean_conf": 0.1320576131687243,
"calib/mu_c": 0.13292857142857142,
"calib/mu_w": 0.13087378640776698,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0037448559670781893,
"calib/std_conf": 0.05286722219655959,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2793.0,
"completions/max_terminated_length": 2793.0,
"completions/mean_length": 557.3046875,
"completions/mean_terminated_length": 563.9130859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.10312332212924957,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0405,
"num_tokens": 41424838.0,
"reward": 1.1510381698608398,
"reward_std": 0.24840062856674194,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.5260277390480042,
"rewards/format_reward_step_strict": 0.9453125,
"step": 185
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 7.78480908625312e-08,
"aux_brier/mean_group_std": 0.06988757628812112,
"aux_brier/mean_r": 0.9571196268251531,
"aux_brier/n_active_tok": 277.375,
"aux_brier/n_groups": 15.875,
"aux_brier/n_step_records": 69.34375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5519071837253656,
"calib/avg_num_step_conf": 8.69921875,
"calib/ece": 0.43569169960474313,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.006237762237762234,
"calib/mean_conf": 0.12952569169960476,
"calib/mu_c": 0.13223776223776224,
"calib/mu_w": 0.126,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.04043943810398236,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2679.0,
"completions/max_terminated_length": 2679.0,
"completions/mean_length": 528.5546875,
"completions/mean_terminated_length": 528.5546875,
"completions/min_length": 173.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.1984,
"grad_norm": 0.06944001466035843,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.053,
"num_tokens": 41665188.0,
"reward": 1.1886343955993652,
"reward_std": 0.26504650712013245,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.5592253804206848,
"rewards/format_reward_step_strict": 0.98046875,
"step": 186
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -9.031887712174225e-07,
"aux_brier/mean_group_std": 0.052017654428443166,
"aux_brier/mean_r": 0.9697135044805943,
"aux_brier/n_active_tok": 345.0,
"aux_brier/n_groups": 28.90625,
"aux_brier/n_step_records": 86.25,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4750064499484004,
"calib/avg_num_step_conf": 10.78125,
"calib/ece": 0.4188,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.004,
"calib/gap": -0.011123581011351857,
"calib/mean_conf": 0.13544,
"calib/mu_c": 0.13036764705882356,
"calib/mu_w": 0.14149122807017542,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0051199999999999996,
"calib/std_conf": 0.06947810014673689,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2546.0,
"completions/max_terminated_length": 2546.0,
"completions/mean_length": 608.14453125,
"completions/mean_terminated_length": 608.14453125,
"completions/min_length": 182.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.06862202286720276,
"learning_rate": 3.611111111111111e-07,
"loss": 0.0999,
"num_tokens": 41922417.0,
"reward": 1.1598312854766846,
"reward_std": 0.27732712030410767,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.5612000226974487,
"rewards/format_reward_step_strict": 0.9765625,
"step": 187
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -4.082003190797323e-06,
"aux_brier/mean_group_std": 0.08251930780726537,
"aux_brier/mean_r": 0.9502170981486534,
"aux_brier/n_active_tok": 281.5,
"aux_brier/n_groups": 17.09375,
"aux_brier/n_step_records": 70.375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4928007135575942,
"calib/avg_num_step_conf": 8.796875,
"calib/ece": 0.4426877470355732,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.006285677879714596,
"calib/mean_conf": 0.13256916996047433,
"calib/mu_c": 0.1298611111111111,
"calib/mu_w": 0.1361467889908257,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.003043478260869565,
"calib/std_conf": 0.06225124534924887,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3032.0,
"completions/max_terminated_length": 3032.0,
"completions/mean_length": 556.95703125,
"completions/mean_terminated_length": 556.95703125,
"completions/min_length": 181.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.05265188589692116,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0109,
"num_tokens": 42169070.0,
"reward": 1.1943097114562988,
"reward_std": 0.2656799554824829,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.5506765842437744,
"rewards/format_reward_step_strict": 0.98828125,
"step": 188
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 9.26180673568866e-06,
"aux_brier/mean_group_std": 0.07424568267242235,
"aux_brier/mean_r": 0.9418080546756995,
"aux_brier/n_active_tok": 265.5,
"aux_brier/n_groups": 17.1875,
"aux_brier/n_step_records": 66.375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.639369975818574,
"calib/avg_num_step_conf": 8.296875,
"calib/ece": 0.4448399999999999,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.018399451016273424,
"calib/mean_conf": 0.12716000000000002,
"calib/mu_c": 0.13503496503496504,
"calib/mu_w": 0.11663551401869161,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.03911437587383953,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2932.0,
"completions/max_terminated_length": 2932.0,
"completions/mean_length": 487.89453125,
"completions/mean_terminated_length": 489.807861328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.2016,
"grad_norm": 0.0532534196972847,
"learning_rate": 3.055555555555556e-07,
"loss": 0.029,
"num_tokens": 42401739.0,
"reward": 1.1886670589447021,
"reward_std": 0.23333409428596497,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.5515433549880981,
"rewards/format_reward_step_strict": 0.9765625,
"step": 189
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.796264378934765e-06,
"aux_brier/mean_group_std": 0.06688086772080293,
"aux_brier/mean_r": 0.962069712552071,
"aux_brier/n_active_tok": 300.625,
"aux_brier/n_groups": 19.21875,
"aux_brier/n_step_records": 75.15625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.7096249521622655,
"calib/avg_num_step_conf": 9.39453125,
"calib/ece": 0.4048605577689243,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.030473912488837906,
"calib/mean_conf": 0.129003984063745,
"calib/mu_c": 0.14320895522388064,
"calib/mu_w": 0.11273504273504273,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.041565529122325265,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2844.0,
"completions/max_terminated_length": 2844.0,
"completions/mean_length": 574.0078125,
"completions/mean_terminated_length": 574.0078125,
"completions/min_length": 191.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.09842872619628906,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0628,
"num_tokens": 42654293.0,
"reward": 1.157977819442749,
"reward_std": 0.2416093796491623,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5850362777709961,
"rewards/format_reward_step_strict": 0.9765625,
"step": 190
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.1005596038968797e-06,
"aux_brier/mean_group_std": 0.05125306216489163,
"aux_brier/mean_r": 0.9721076662297099,
"aux_brier/n_active_tok": 279.125,
"aux_brier/n_groups": 16.03125,
"aux_brier/n_step_records": 69.78125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5840792838874681,
"calib/avg_num_step_conf": 8.734375,
"calib/ece": 0.32641434262948205,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.012413682864450132,
"calib/mean_conf": 0.13414342629482073,
"calib/mu_c": 0.1408695652173913,
"calib/mu_w": 0.12845588235294117,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.001195219123505976,
"calib/std_conf": 0.04556379165711359,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2560.0,
"completions/max_terminated_length": 2560.0,
"completions/mean_length": 505.16015625,
"completions/mean_terminated_length": 507.1412048339844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.05959112197160721,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0554,
"num_tokens": 42887782.0,
"reward": 1.0989865064620972,
"reward_std": 0.23148463666439056,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.6381340026855469,
"rewards/format_reward_step_strict": 0.98046875,
"step": 191
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 4.396978059928447e-06,
"aux_brier/mean_group_std": 0.06359662057148917,
"aux_brier/mean_r": 0.9631366202545875,
"aux_brier/n_active_tok": 254.0,
"aux_brier/n_groups": 15.625,
"aux_brier/n_step_records": 63.5,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5367821390937829,
"calib/avg_num_step_conf": 8.18359375,
"calib/ece": 0.46372,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0038751317175974853,
"calib/mean_conf": 0.12284,
"calib/mu_c": 0.12445205479452055,
"calib/mu_w": 0.12057692307692307,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.00128,
"calib/std_conf": 0.044221424671758366,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2674.0,
"completions/max_terminated_length": 2674.0,
"completions/mean_length": 521.80078125,
"completions/mean_terminated_length": 525.909423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.2048,
"grad_norm": 0.13064619898796082,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0396,
"num_tokens": 43126339.0,
"reward": 1.1853406429290771,
"reward_std": 0.2816307544708252,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.530424952507019,
"rewards/format_reward_step_strict": 0.96484375,
"step": 192
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.011821052138151e-06,
"aux_brier/mean_group_std": 0.036070646126805155,
"aux_brier/mean_r": 0.9748769477094339,
"aux_brier/n_active_tok": 291.75,
"aux_brier/n_groups": 19.34375,
"aux_brier/n_step_records": 72.9375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5001913509376196,
"calib/avg_num_step_conf": 9.39453125,
"calib/ece": 0.4004780876494024,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004742314070672299,
"calib/mean_conf": 0.13338645418326692,
"calib/mu_c": 0.13559701492537315,
"calib/mu_w": 0.13085470085470086,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.0513491879135335,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2482.0,
"completions/max_terminated_length": 2482.0,
"completions/mean_length": 527.77734375,
"completions/mean_terminated_length": 531.9330444335938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.05980648100376129,
"learning_rate": 1.9444444444444447e-07,
"loss": 0.0526,
"num_tokens": 43367162.0,
"reward": 1.153533935546875,
"reward_std": 0.3433189392089844,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5750734806060791,
"rewards/format_reward_step_strict": 0.97265625,
"step": 193
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": -1.1731222970823296e-06,
"aux_brier/mean_group_std": 0.07410394084552661,
"aux_brier/mean_r": 0.9527812351996452,
"aux_brier/n_active_tok": 257.75,
"aux_brier/n_groups": 13.96875,
"aux_brier/n_step_records": 64.4375,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5557051282051282,
"calib/avg_num_step_conf": 8.28515625,
"calib/ece": 0.39520000000000005,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009551282051282042,
"calib/mean_conf": 0.1248,
"calib/mu_c": 0.12938461538461538,
"calib/mu_w": 0.11983333333333333,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.048192945541852904,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2199.0,
"completions/max_terminated_length": 2199.0,
"completions/mean_length": 472.33203125,
"completions/mean_terminated_length": 476.0511779785156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.1151818186044693,
"learning_rate": 1.6666666666666668e-07,
"loss": -0.006,
"num_tokens": 43594023.0,
"reward": 1.138839840888977,
"reward_std": 0.3054915964603424,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.5787968635559082,
"rewards/format_reward_step_strict": 0.97265625,
"step": 194
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 2.9951615692080935e-06,
"aux_brier/mean_group_std": 0.06479303479010276,
"aux_brier/mean_r": 0.9559623480273672,
"aux_brier/n_active_tok": 267.125,
"aux_brier/n_groups": 15.5625,
"aux_brier/n_step_records": 66.78125,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.4836433188233742,
"calib/avg_num_step_conf": 8.4296875,
"calib/ece": 0.4182591093117408,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.004160401002506281,
"calib/mean_conf": 0.12530364372469638,
"calib/mu_c": 0.12338345864661654,
"calib/mu_w": 0.12754385964912282,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0025506072874493927,
"calib/std_conf": 0.04489088104836181,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2844.0,
"completions/max_terminated_length": 2844.0,
"completions/mean_length": 514.15234375,
"completions/mean_terminated_length": 520.2490234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.208,
"grad_norm": 0.34284332394599915,
"learning_rate": 1.3888888888888888e-07,
"loss": -0.0164,
"num_tokens": 43831630.0,
"reward": 1.1381409168243408,
"reward_std": 0.2621338367462158,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.5525633096694946,
"rewards/format_reward_step_strict": 0.9609375,
"step": 195
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 1.7311345216208807e-06,
"aux_brier/mean_group_std": 0.05536466923083148,
"aux_brier/mean_r": 0.9579037857900614,
"aux_brier/n_active_tok": 244.25,
"aux_brier/n_groups": 12.875,
"aux_brier/n_step_records": 61.0625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4931811072880825,
"calib/avg_num_step_conf": 7.63671875,
"calib/ece": 0.42236220472440944,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.002194557397560165,
"calib/mean_conf": 0.12984251968503938,
"calib/mu_c": 0.12884892086330937,
"calib/mu_w": 0.13104347826086954,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.002480314960629921,
"calib/std_conf": 0.046201957417859894,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3002.0,
"completions/max_terminated_length": 3002.0,
"completions/mean_length": 431.05859375,
"completions/mean_terminated_length": 432.7490539550781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.03306657820940018,
"learning_rate": 1.1111111111111112e-07,
"loss": 0.0166,
"num_tokens": 44044525.0,
"reward": 1.1816363334655762,
"reward_std": 0.16497869789600372,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.5702953338623047,
"rewards/format_reward_step_strict": 0.9921875,
"step": 196
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.614795514028657e-06,
"aux_brier/mean_group_std": 0.10405568824929422,
"aux_brier/mean_r": 0.9350196740294592,
"aux_brier/n_active_tok": 285.875,
"aux_brier/n_groups": 18.09375,
"aux_brier/n_step_records": 71.46875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5083735909822866,
"calib/avg_num_step_conf": 9.14453125,
"calib/ece": 0.32848000000000005,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0004959742351046681,
"calib/mean_conf": 0.13392,
"calib/mu_c": 0.1336521739130435,
"calib/mu_w": 0.13414814814814816,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0012,
"calib/std_conf": 0.048454448712166774,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3021.0,
"completions/max_terminated_length": 3021.0,
"completions/mean_length": 522.1875,
"completions/mean_terminated_length": 526.2991943359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.103756844997406,
"learning_rate": 8.333333333333334e-08,
"loss": 0.0285,
"num_tokens": 44283261.0,
"reward": 1.0892724990844727,
"reward_std": 0.2750256061553955,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.6227148771286011,
"rewards/format_reward_step_strict": 0.96875,
"step": 197
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 3.3878599466352632e-06,
"aux_brier/mean_group_std": 0.054694135177370894,
"aux_brier/mean_r": 0.968603385805794,
"aux_brier/n_active_tok": 265.0,
"aux_brier/n_groups": 15.53125,
"aux_brier/n_step_records": 66.25,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.54859477124183,
"calib/avg_num_step_conf": 8.28125,
"calib/ece": 0.47019841269841267,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009125490196078431,
"calib/mean_conf": 0.12503968253968253,
"calib/mu_c": 0.12873333333333334,
"calib/mu_w": 0.11960784313725491,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.048415349263881836,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3042.0,
"completions/max_terminated_length": 3042.0,
"completions/mean_length": 498.203125,
"completions/mean_terminated_length": 498.203125,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.2112,
"grad_norm": 0.13945958018302917,
"learning_rate": 5.555555555555556e-08,
"loss": 0.1007,
"num_tokens": 44516185.0,
"reward": 1.2089036703109741,
"reward_std": 0.22615382075309753,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.530927300453186,
"rewards/format_reward_step_strict": 0.98046875,
"step": 198
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 5.530694379529688e-06,
"aux_brier/mean_group_std": 0.06728849122525137,
"aux_brier/mean_r": 0.9580130139995875,
"aux_brier/n_active_tok": 299.5,
"aux_brier/n_groups": 20.0,
"aux_brier/n_step_records": 74.875,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5274936061381074,
"calib/avg_num_step_conf": 9.359375,
"calib/ece": 0.41394422310756973,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005650255754475725,
"calib/mean_conf": 0.1302788844621514,
"calib/mu_c": 0.13286764705882353,
"calib/mu_w": 0.1272173913043478,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.001195219123505976,
"calib/std_conf": 0.04652380096387325,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2985.0,
"completions/max_terminated_length": 2985.0,
"completions/mean_length": 574.96875,
"completions/mean_terminated_length": 577.2235717773438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.10852678120136261,
"learning_rate": 2.777777777777778e-08,
"loss": 0.0178,
"num_tokens": 44767577.0,
"reward": 1.1592490673065186,
"reward_std": 0.2842707633972168,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.5666840076446533,
"rewards/format_reward_step_strict": 0.97265625,
"step": 199
},
{
"aux_brier/lambda": 0.25,
"aux_brier/loss": 6.376076377989648e-07,
"aux_brier/mean_group_std": 0.055945407588163985,
"aux_brier/mean_r": 0.9684017162047385,
"aux_brier/n_active_tok": 275.625,
"aux_brier/n_groups": 18.78125,
"aux_brier/n_step_records": 68.90625,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5670890410958905,
"calib/avg_num_step_conf": 8.61328125,
"calib/ece": 0.4642682926829268,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010998630136986307,
"calib/mean_conf": 0.12922764227642275,
"calib/mu_c": 0.1336986301369863,
"calib/mu_w": 0.12269999999999999,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.04538692068468501,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2933.0,
"completions/max_terminated_length": 2933.0,
"completions/mean_length": 561.66015625,
"completions/mean_terminated_length": 561.66015625,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.1206001341342926,
"learning_rate": 0.0,
"loss": 0.0931,
"num_tokens": 45019410.0,
"reward": 1.1839895248413086,
"reward_std": 0.2561890482902527,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/final_brier_reward_step": 0.5250203013420105,
"rewards/format_reward_step_strict": 0.95703125,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.03789105351956096,
"train_runtime": 16013.4832,
"train_samples_per_second": 3.197,
"train_steps_per_second": 0.012
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 45019410,
"num_train_epochs": 1,
"save_steps": 40,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}