Files
GRPO-7B-ls-v1-fullepoch-hotpot/trainer_state.json
ModelHub XC 38b7a8e4a1 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/GRPO-7B-ls-v1-fullepoch-hotpot
Source: Original Platform
2026-05-28 05:46:18 +08:00

21347 lines
807 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 625,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1640625,
"completions/max_length": 1000.0,
"completions/max_terminated_length": 1000.0,
"completions/mean_length": 341.4609375,
"completions/mean_terminated_length": 408.47662353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0016,
"grad_norm": 0.17134852707386017,
"learning_rate": 1.5873015873015872e-08,
"loss": -0.116,
"num_tokens": 486582.0,
"reward": 0.41310209035873413,
"reward_std": 0.4805126190185547,
"rewards/accuracy_reward_long_step": 0.2265625,
"rewards/final_brier_reward_long_step": 0.11814829707145691,
"rewards/format_reward_long_step": 0.23046875,
"rewards/stepwise_brier_reward_long_step": 0.1670725792646408,
"step": 1
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.19140625,
"completions/max_length": 1019.0,
"completions/max_terminated_length": 1019.0,
"completions/mean_length": 303.75,
"completions/mean_terminated_length": 375.65216064453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.6887706518173218,
"learning_rate": 3.1746031746031744e-08,
"loss": -0.1486,
"num_tokens": 985630.0,
"reward": 0.4098304212093353,
"reward_std": 0.5015645623207092,
"rewards/accuracy_reward_long_step": 0.1875,
"rewards/final_brier_reward_long_step": 0.1355031430721283,
"rewards/format_reward_long_step": 0.27734375,
"rewards/stepwise_brier_reward_long_step": 0.1991310715675354,
"step": 2
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.15234375,
"completions/max_length": 1022.0,
"completions/max_terminated_length": 1022.0,
"completions/mean_length": 353.46484375,
"completions/mean_terminated_length": 416.99078369140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0048,
"grad_norm": 0.411814421415329,
"learning_rate": 4.7619047619047613e-08,
"loss": -0.1027,
"num_tokens": 1490821.0,
"reward": 0.41081345081329346,
"reward_std": 0.5538315773010254,
"rewards/accuracy_reward_long_step": 0.19921875,
"rewards/final_brier_reward_long_step": 0.13149982690811157,
"rewards/format_reward_long_step": 0.25390625,
"rewards/stepwise_brier_reward_long_step": 0.20706646144390106,
"step": 3
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.15625,
"completions/max_length": 944.0,
"completions/max_terminated_length": 944.0,
"completions/mean_length": 340.40625,
"completions/mean_terminated_length": 403.4444580078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.40814000368118286,
"learning_rate": 6.349206349206349e-08,
"loss": -0.0835,
"num_tokens": 2004965.0,
"reward": 0.3751431107521057,
"reward_std": 0.48189181089401245,
"rewards/accuracy_reward_long_step": 0.1875,
"rewards/final_brier_reward_long_step": 0.11413241922855377,
"rewards/format_reward_long_step": 0.23046875,
"rewards/stepwise_brier_reward_long_step": 0.17550255358219147,
"step": 4
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.15234375,
"completions/max_length": 1001.0,
"completions/max_terminated_length": 1001.0,
"completions/mean_length": 343.70703125,
"completions/mean_terminated_length": 405.479248046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.008,
"grad_norm": 0.37365081906318665,
"learning_rate": 7.936507936507936e-08,
"loss": -0.0435,
"num_tokens": 2528514.0,
"reward": 0.34497031569480896,
"reward_std": 0.45299145579338074,
"rewards/accuracy_reward_long_step": 0.14453125,
"rewards/final_brier_reward_long_step": 0.10058828443288803,
"rewards/format_reward_long_step": 0.26953125,
"rewards/stepwise_brier_reward_long_step": 0.1621055006980896,
"step": 5
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1640625,
"completions/max_length": 996.0,
"completions/max_terminated_length": 996.0,
"completions/mean_length": 310.33203125,
"completions/mean_terminated_length": 371.2383117675781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0096,
"grad_norm": 2.2686784267425537,
"learning_rate": 9.523809523809523e-08,
"loss": -0.1196,
"num_tokens": 3035623.0,
"reward": 0.39475879073143005,
"reward_std": 0.5006267428398132,
"rewards/accuracy_reward_long_step": 0.1953125,
"rewards/final_brier_reward_long_step": 0.11408085376024246,
"rewards/format_reward_long_step": 0.2421875,
"rewards/stepwise_brier_reward_long_step": 0.19932931661605835,
"step": 6
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1640625,
"completions/max_length": 1011.0,
"completions/max_terminated_length": 1011.0,
"completions/mean_length": 349.5703125,
"completions/mean_terminated_length": 418.17755126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0112,
"grad_norm": 0.5209683179855347,
"learning_rate": 1.111111111111111e-07,
"loss": -0.1158,
"num_tokens": 3544593.0,
"reward": 0.36087095737457275,
"reward_std": 0.5047852993011475,
"rewards/accuracy_reward_long_step": 0.16015625,
"rewards/final_brier_reward_long_step": 0.11222599446773529,
"rewards/format_reward_long_step": 0.25390625,
"rewards/stepwise_brier_reward_long_step": 0.18282026052474976,
"step": 7
},
{
"calib/answer_extract_rate": 0.0078125,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0078125,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.19140625,
"completions/max_length": 1020.0,
"completions/max_terminated_length": 1020.0,
"completions/mean_length": 334.12109375,
"completions/mean_terminated_length": 413.2125549316406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0128,
"grad_norm": 0.5285189151763916,
"learning_rate": 1.2698412698412698e-07,
"loss": -0.0977,
"num_tokens": 4034728.0,
"reward": 0.4276258945465088,
"reward_std": 0.5124700665473938,
"rewards/accuracy_reward_long_step": 0.1953125,
"rewards/final_brier_reward_long_step": 0.13241875171661377,
"rewards/format_reward_long_step": 0.2890625,
"rewards/stepwise_brier_reward_long_step": 0.2187097817659378,
"step": 8
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.14453125,
"completions/max_length": 976.0,
"completions/max_terminated_length": 976.0,
"completions/mean_length": 361.22265625,
"completions/mean_terminated_length": 422.2511291503906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0144,
"grad_norm": 2.121281862258911,
"learning_rate": 1.4285714285714285e-07,
"loss": -0.1102,
"num_tokens": 4560393.0,
"reward": 0.2677909731864929,
"reward_std": 0.41135305166244507,
"rewards/accuracy_reward_long_step": 0.07421875,
"rewards/final_brier_reward_long_step": 0.11064782738685608,
"rewards/format_reward_long_step": 0.25,
"rewards/stepwise_brier_reward_long_step": 0.163641095161438,
"step": 9
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.2,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.17578125,
"completions/max_length": 1022.0,
"completions/max_terminated_length": 1022.0,
"completions/mean_length": 344.53515625,
"completions/mean_terminated_length": 418.01422119140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.2448827624320984,
"learning_rate": 1.5873015873015872e-07,
"loss": -0.1562,
"num_tokens": 5068466.0,
"reward": 0.3565204441547394,
"reward_std": 0.4487614333629608,
"rewards/accuracy_reward_long_step": 0.15234375,
"rewards/final_brier_reward_long_step": 0.11602266132831573,
"rewards/format_reward_long_step": 0.26171875,
"rewards/stepwise_brier_reward_long_step": 0.17724668979644775,
"step": 10
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1328125,
"completions/max_length": 1023.0,
"completions/max_terminated_length": 1023.0,
"completions/mean_length": 349.59765625,
"completions/mean_terminated_length": 403.1396484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0176,
"grad_norm": 0.8030067086219788,
"learning_rate": 1.7460317460317458e-07,
"loss": -0.0554,
"num_tokens": 5589579.0,
"reward": 0.2793608009815216,
"reward_std": 0.404774010181427,
"rewards/accuracy_reward_long_step": 0.10546875,
"rewards/final_brier_reward_long_step": 0.08069999516010284,
"rewards/format_reward_long_step": 0.234375,
"rewards/stepwise_brier_reward_long_step": 0.1461181938648224,
"step": 11
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.9,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.18359375,
"completions/max_length": 1011.0,
"completions/max_terminated_length": 1011.0,
"completions/mean_length": 322.5234375,
"completions/mean_terminated_length": 395.0526123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0192,
"grad_norm": 0.28563177585601807,
"learning_rate": 1.9047619047619045e-07,
"loss": -0.119,
"num_tokens": 6094689.0,
"reward": 0.35700535774230957,
"reward_std": 0.45222049951553345,
"rewards/accuracy_reward_long_step": 0.19140625,
"rewards/final_brier_reward_long_step": 0.09916991740465164,
"rewards/format_reward_long_step": 0.19921875,
"rewards/stepwise_brier_reward_long_step": 0.16478905081748962,
"step": 12
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.16015625,
"completions/max_length": 980.0,
"completions/max_terminated_length": 980.0,
"completions/mean_length": 322.3203125,
"completions/mean_terminated_length": 383.7860412597656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0208,
"grad_norm": 0.1121608093380928,
"learning_rate": 2.0634920634920632e-07,
"loss": -0.0936,
"num_tokens": 6610131.0,
"reward": 0.410109281539917,
"reward_std": 0.4506571292877197,
"rewards/accuracy_reward_long_step": 0.1796875,
"rewards/final_brier_reward_long_step": 0.12741835415363312,
"rewards/format_reward_long_step": 0.296875,
"rewards/stepwise_brier_reward_long_step": 0.2005188763141632,
"step": 13
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.171875,
"completions/max_length": 1013.0,
"completions/max_terminated_length": 1013.0,
"completions/mean_length": 317.0234375,
"completions/mean_terminated_length": 382.8207702636719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0224,
"grad_norm": 0.11094717681407928,
"learning_rate": 2.222222222222222e-07,
"loss": -0.1125,
"num_tokens": 7122753.0,
"reward": 0.45112496614456177,
"reward_std": 0.50334632396698,
"rewards/accuracy_reward_long_step": 0.21484375,
"rewards/final_brier_reward_long_step": 0.14254721999168396,
"rewards/format_reward_long_step": 0.296875,
"rewards/stepwise_brier_reward_long_step": 0.20882770419120789,
"step": 14
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1796875,
"completions/max_length": 966.0,
"completions/max_terminated_length": 966.0,
"completions/mean_length": 311.2265625,
"completions/mean_terminated_length": 379.4000244140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.024,
"grad_norm": 0.13925662636756897,
"learning_rate": 2.3809523809523806e-07,
"loss": -0.1164,
"num_tokens": 7630235.0,
"reward": 0.31538814306259155,
"reward_std": 0.4458809494972229,
"rewards/accuracy_reward_long_step": 0.13671875,
"rewards/final_brier_reward_long_step": 0.10075005888938904,
"rewards/format_reward_long_step": 0.21875,
"rewards/stepwise_brier_reward_long_step": 0.1764274686574936,
"step": 15
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 1.0,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1484375,
"completions/max_length": 1018.0,
"completions/max_terminated_length": 1018.0,
"completions/mean_length": 326.4453125,
"completions/mean_terminated_length": 383.3486022949219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0256,
"grad_norm": 0.7503873109817505,
"learning_rate": 2.5396825396825396e-07,
"loss": -0.126,
"num_tokens": 8156781.0,
"reward": 0.4038216471672058,
"reward_std": 0.5121759176254272,
"rewards/accuracy_reward_long_step": 0.1875,
"rewards/final_brier_reward_long_step": 0.13070036470890045,
"rewards/format_reward_long_step": 0.26953125,
"rewards/stepwise_brier_reward_long_step": 0.19552379846572876,
"step": 16
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1328125,
"completions/max_length": 1018.0,
"completions/max_terminated_length": 1018.0,
"completions/mean_length": 338.77734375,
"completions/mean_terminated_length": 390.66217041015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0272,
"grad_norm": 0.06501276046037674,
"learning_rate": 2.698412698412698e-07,
"loss": -0.1059,
"num_tokens": 8640828.0,
"reward": 0.3153911828994751,
"reward_std": 0.4668186902999878,
"rewards/accuracy_reward_long_step": 0.12890625,
"rewards/final_brier_reward_long_step": 0.10800625383853912,
"rewards/format_reward_long_step": 0.234375,
"rewards/stepwise_brier_reward_long_step": 0.16918347775936127,
"step": 17
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.17578125,
"completions/max_length": 979.0,
"completions/max_terminated_length": 979.0,
"completions/mean_length": 322.75390625,
"completions/mean_terminated_length": 391.58770751953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0288,
"grad_norm": 0.1249445378780365,
"learning_rate": 2.857142857142857e-07,
"loss": -0.1224,
"num_tokens": 9147797.0,
"reward": 0.3471192717552185,
"reward_std": 0.4712105989456177,
"rewards/accuracy_reward_long_step": 0.14453125,
"rewards/final_brier_reward_long_step": 0.11726874858140945,
"rewards/format_reward_long_step": 0.25390625,
"rewards/stepwise_brier_reward_long_step": 0.18527084589004517,
"step": 18
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.95,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.13671875,
"completions/max_length": 1004.0,
"completions/max_terminated_length": 1004.0,
"completions/mean_length": 343.99609375,
"completions/mean_terminated_length": 398.4751281738281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0304,
"grad_norm": 0.07186749577522278,
"learning_rate": 3.0158730158730156e-07,
"loss": -0.0533,
"num_tokens": 9663764.0,
"reward": 0.379126638174057,
"reward_std": 0.49106040596961975,
"rewards/accuracy_reward_long_step": 0.1796875,
"rewards/final_brier_reward_long_step": 0.11015625298023224,
"rewards/format_reward_long_step": 0.25,
"rewards/stepwise_brier_reward_long_step": 0.18760032951831818,
"step": 19
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.140625,
"completions/max_length": 980.0,
"completions/max_terminated_length": 980.0,
"completions/mean_length": 344.03125,
"completions/mean_terminated_length": 400.3272705078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.1281915009021759,
"learning_rate": 3.1746031746031743e-07,
"loss": -0.0944,
"num_tokens": 10179540.0,
"reward": 0.3329862952232361,
"reward_std": 0.4402102828025818,
"rewards/accuracy_reward_long_step": 0.1328125,
"rewards/final_brier_reward_long_step": 0.11334909498691559,
"rewards/format_reward_long_step": 0.2578125,
"rewards/stepwise_brier_reward_long_step": 0.17172113060951233,
"step": 20
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.17578125,
"completions/max_length": 927.0,
"completions/max_terminated_length": 927.0,
"completions/mean_length": 325.28515625,
"completions/mean_terminated_length": 394.6587829589844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0336,
"grad_norm": 0.15362648665905,
"learning_rate": 3.333333333333333e-07,
"loss": -0.0859,
"num_tokens": 10689989.0,
"reward": 0.35474836826324463,
"reward_std": 0.48964905738830566,
"rewards/accuracy_reward_long_step": 0.15625,
"rewards/final_brier_reward_long_step": 0.11406318843364716,
"rewards/format_reward_long_step": 0.25,
"rewards/stepwise_brier_reward_long_step": 0.17993025481700897,
"step": 21
},
{
"calib/answer_extract_rate": 0.01171875,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 1.0,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.01171875,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1953125,
"completions/max_length": 997.0,
"completions/max_terminated_length": 997.0,
"completions/mean_length": 311.5625,
"completions/mean_terminated_length": 387.1844787597656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0352,
"grad_norm": 0.21162962913513184,
"learning_rate": 3.4920634920634917e-07,
"loss": -0.1431,
"num_tokens": 11193597.0,
"reward": 0.32876265048980713,
"reward_std": 0.4388379454612732,
"rewards/accuracy_reward_long_step": 0.1171875,
"rewards/final_brier_reward_long_step": 0.10059726983308792,
"rewards/format_reward_long_step": 0.26953125,
"rewards/stepwise_brier_reward_long_step": 0.20664086937904358,
"step": 22
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.12890625,
"completions/max_length": 1010.0,
"completions/max_terminated_length": 1010.0,
"completions/mean_length": 347.2265625,
"completions/mean_terminated_length": 398.6098937988281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 26.0,
"epoch": 0.0368,
"grad_norm": 0.08162181079387665,
"learning_rate": 3.6507936507936504e-07,
"loss": -0.0443,
"num_tokens": 11712407.0,
"reward": 0.3704327940940857,
"reward_std": 0.4654346704483032,
"rewards/accuracy_reward_long_step": 0.171875,
"rewards/final_brier_reward_long_step": 0.0984906256198883,
"rewards/format_reward_long_step": 0.26171875,
"rewards/stepwise_brier_reward_long_step": 0.17230293154716492,
"step": 23
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1024.0,
"completions/mean_length": 358.25,
"completions/mean_terminated_length": 409.4285888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0384,
"grad_norm": 0.22265669703483582,
"learning_rate": 3.809523809523809e-07,
"loss": -0.0882,
"num_tokens": 12222919.0,
"reward": 0.39699164032936096,
"reward_std": 0.49701642990112305,
"rewards/accuracy_reward_long_step": 0.17578125,
"rewards/final_brier_reward_long_step": 0.11312989890575409,
"rewards/format_reward_long_step": 0.28125,
"rewards/stepwise_brier_reward_long_step": 0.20921160280704498,
"step": 24
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.8,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 1015.0,
"completions/max_terminated_length": 1015.0,
"completions/mean_length": 361.26171875,
"completions/mean_terminated_length": 398.63360595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.04,
"grad_norm": 0.07138670980930328,
"learning_rate": 3.968253968253968e-07,
"loss": -0.0724,
"num_tokens": 12748058.0,
"reward": 0.39820796251296997,
"reward_std": 0.4787534177303314,
"rewards/accuracy_reward_long_step": 0.14453125,
"rewards/final_brier_reward_long_step": 0.1328800767660141,
"rewards/format_reward_long_step": 0.32421875,
"rewards/stepwise_brier_reward_long_step": 0.23338933289051056,
"step": 25
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.17578125,
"completions/max_length": 1008.0,
"completions/max_terminated_length": 1008.0,
"completions/mean_length": 326.24609375,
"completions/mean_terminated_length": 395.82464599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0416,
"grad_norm": 0.06991428881883621,
"learning_rate": 4.1269841269841265e-07,
"loss": -0.1661,
"num_tokens": 13269377.0,
"reward": 0.3767819404602051,
"reward_std": 0.46116840839385986,
"rewards/accuracy_reward_long_step": 0.15625,
"rewards/final_brier_reward_long_step": 0.11419257521629333,
"rewards/format_reward_long_step": 0.2890625,
"rewards/stepwise_brier_reward_long_step": 0.1898101270198822,
"step": 26
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.14453125,
"completions/max_length": 989.0,
"completions/max_terminated_length": 989.0,
"completions/mean_length": 352.62109375,
"completions/mean_terminated_length": 412.1963195800781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0432,
"grad_norm": 0.18298697471618652,
"learning_rate": 4.285714285714285e-07,
"loss": -0.0767,
"num_tokens": 13776568.0,
"reward": 0.3685033917427063,
"reward_std": 0.4383317232131958,
"rewards/accuracy_reward_long_step": 0.13671875,
"rewards/final_brier_reward_long_step": 0.11230936646461487,
"rewards/format_reward_long_step": 0.3046875,
"rewards/stepwise_brier_reward_long_step": 0.20545418560504913,
"step": 27
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.15625,
"completions/max_length": 1013.0,
"completions/max_terminated_length": 1013.0,
"completions/mean_length": 360.98828125,
"completions/mean_terminated_length": 427.83795166015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 10.0,
"epoch": 0.0448,
"grad_norm": 0.07104546576738358,
"learning_rate": 4.444444444444444e-07,
"loss": -0.134,
"num_tokens": 14287893.0,
"reward": 0.41739368438720703,
"reward_std": 0.5269143581390381,
"rewards/accuracy_reward_long_step": 0.16796875,
"rewards/final_brier_reward_long_step": 0.14333046972751617,
"rewards/format_reward_long_step": 0.31640625,
"rewards/stepwise_brier_reward_long_step": 0.22155673801898956,
"step": 28
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.75,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.140625,
"completions/max_length": 966.0,
"completions/max_terminated_length": 966.0,
"completions/mean_length": 338.6015625,
"completions/mean_terminated_length": 394.0090637207031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0464,
"grad_norm": 0.1085757464170456,
"learning_rate": 4.6031746031746025e-07,
"loss": -0.0861,
"num_tokens": 14807951.0,
"reward": 0.44378989934921265,
"reward_std": 0.4950755834579468,
"rewards/accuracy_reward_long_step": 0.16796875,
"rewards/final_brier_reward_long_step": 0.13291756808757782,
"rewards/format_reward_long_step": 0.359375,
"rewards/stepwise_brier_reward_long_step": 0.2516169548034668,
"step": 29
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.95,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.12109375,
"completions/max_length": 980.0,
"completions/max_terminated_length": 980.0,
"completions/mean_length": 353.80078125,
"completions/mean_terminated_length": 402.5466613769531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 10.0,
"epoch": 0.048,
"grad_norm": 0.08588235080242157,
"learning_rate": 4.761904761904761e-07,
"loss": -0.0824,
"num_tokens": 15312820.0,
"reward": 0.5131794214248657,
"reward_std": 0.5309146046638489,
"rewards/accuracy_reward_long_step": 0.20703125,
"rewards/final_brier_reward_long_step": 0.17949271202087402,
"rewards/format_reward_long_step": 0.390625,
"rewards/stepwise_brier_reward_long_step": 0.2638500928878784,
"step": 30
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.98,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 992.0,
"completions/max_terminated_length": 992.0,
"completions/mean_length": 355.95703125,
"completions/mean_terminated_length": 392.7801818847656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0496,
"grad_norm": 0.0898517370223999,
"learning_rate": 4.92063492063492e-07,
"loss": -0.0724,
"num_tokens": 15823481.0,
"reward": 0.5299139618873596,
"reward_std": 0.57805997133255,
"rewards/accuracy_reward_long_step": 0.2421875,
"rewards/final_brier_reward_long_step": 0.17374873161315918,
"rewards/format_reward_long_step": 0.35546875,
"rewards/stepwise_brier_reward_long_step": 0.26621949672698975,
"step": 31
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.9,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1013.0,
"completions/max_terminated_length": 1013.0,
"completions/mean_length": 358.1640625,
"completions/mean_terminated_length": 409.33038330078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0512,
"grad_norm": 1.044785737991333,
"learning_rate": 5.079365079365079e-07,
"loss": -0.1237,
"num_tokens": 16325931.0,
"reward": 0.44143152236938477,
"reward_std": 0.4683123230934143,
"rewards/accuracy_reward_long_step": 0.16796875,
"rewards/final_brier_reward_long_step": 0.14440733194351196,
"rewards/format_reward_long_step": 0.35546875,
"rewards/stepwise_brier_reward_long_step": 0.2385062575340271,
"step": 32
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1016.0,
"completions/max_terminated_length": 1016.0,
"completions/mean_length": 359.73046875,
"completions/mean_terminated_length": 411.12054443359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0528,
"grad_norm": 0.04461158439517021,
"learning_rate": 5.238095238095238e-07,
"loss": -0.0966,
"num_tokens": 16843398.0,
"reward": 0.44179078936576843,
"reward_std": 0.507757306098938,
"rewards/accuracy_reward_long_step": 0.16796875,
"rewards/final_brier_reward_long_step": 0.15976552665233612,
"rewards/format_reward_long_step": 0.36328125,
"rewards/stepwise_brier_reward_long_step": 0.20896016061306,
"step": 33
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0078125,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.96,
"calib/nonempty_final_conf_rate": 0.0078125,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.010000000000000009,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 995.0,
"completions/max_terminated_length": 995.0,
"completions/mean_length": 345.3828125,
"completions/mean_terminated_length": 382.76190185546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0544,
"grad_norm": 0.14507651329040527,
"learning_rate": 5.396825396825396e-07,
"loss": -0.1239,
"num_tokens": 17357632.0,
"reward": 0.4228193163871765,
"reward_std": 0.46190011501312256,
"rewards/accuracy_reward_long_step": 0.125,
"rewards/final_brier_reward_long_step": 0.15529990196228027,
"rewards/format_reward_long_step": 0.3828125,
"rewards/stepwise_brier_reward_long_step": 0.2703523635864258,
"step": 34
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.109375,
"completions/max_length": 980.0,
"completions/max_terminated_length": 980.0,
"completions/mean_length": 342.94140625,
"completions/mean_terminated_length": 385.0570068359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.056,
"grad_norm": 0.13200579583644867,
"learning_rate": 5.555555555555555e-07,
"loss": -0.0371,
"num_tokens": 17874505.0,
"reward": 0.6404584646224976,
"reward_std": 0.5706257820129395,
"rewards/accuracy_reward_long_step": 0.29296875,
"rewards/final_brier_reward_long_step": 0.22589921951293945,
"rewards/format_reward_long_step": 0.42578125,
"rewards/stepwise_brier_reward_long_step": 0.31249701976776123,
"step": 35
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.97,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.109375,
"completions/max_length": 955.0,
"completions/max_terminated_length": 955.0,
"completions/mean_length": 314.12109375,
"completions/mean_terminated_length": 352.6973571777344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0576,
"grad_norm": 0.15383663773536682,
"learning_rate": 5.714285714285714e-07,
"loss": -0.1074,
"num_tokens": 18373048.0,
"reward": 0.6416888236999512,
"reward_std": 0.582075834274292,
"rewards/accuracy_reward_long_step": 0.234375,
"rewards/final_brier_reward_long_step": 0.23546718060970306,
"rewards/format_reward_long_step": 0.5078125,
"rewards/stepwise_brier_reward_long_step": 0.37816306948661804,
"step": 36
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.94,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11328125,
"completions/max_length": 987.0,
"completions/max_terminated_length": 987.0,
"completions/mean_length": 334.453125,
"completions/mean_terminated_length": 377.18060302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.0592,
"grad_norm": 0.16420908272266388,
"learning_rate": 5.873015873015873e-07,
"loss": -0.0809,
"num_tokens": 18885148.0,
"reward": 0.6179122924804688,
"reward_std": 0.5788693428039551,
"rewards/accuracy_reward_long_step": 0.22265625,
"rewards/final_brier_reward_long_step": 0.20363515615463257,
"rewards/format_reward_long_step": 0.49609375,
"rewards/stepwise_brier_reward_long_step": 0.385201632976532,
"step": 37
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 996.0,
"completions/max_terminated_length": 996.0,
"completions/mean_length": 345.3828125,
"completions/mean_terminated_length": 374.6525573730469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0608,
"grad_norm": 0.20274563133716583,
"learning_rate": 6.031746031746031e-07,
"loss": -0.0143,
"num_tokens": 19403174.0,
"reward": 0.5915793180465698,
"reward_std": 0.5329806804656982,
"rewards/accuracy_reward_long_step": 0.16015625,
"rewards/final_brier_reward_long_step": 0.2439812570810318,
"rewards/format_reward_long_step": 0.5390625,
"rewards/stepwise_brier_reward_long_step": 0.40358591079711914,
"step": 38
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 918.0,
"completions/max_terminated_length": 918.0,
"completions/mean_length": 326.77734375,
"completions/mean_terminated_length": 357.5000305175781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.0624,
"grad_norm": 0.3366471827030182,
"learning_rate": 6.19047619047619e-07,
"loss": -0.0919,
"num_tokens": 19907301.0,
"reward": 0.5409894585609436,
"reward_std": 0.4594622850418091,
"rewards/accuracy_reward_long_step": 0.13671875,
"rewards/final_brier_reward_long_step": 0.20591670274734497,
"rewards/format_reward_long_step": 0.546875,
"rewards/stepwise_brier_reward_long_step": 0.31741613149642944,
"step": 39
},
{
"calib/answer_extract_rate": 0.0078125,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.95,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0078125,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 1015.0,
"completions/max_terminated_length": 1015.0,
"completions/mean_length": 326.78515625,
"completions/mean_terminated_length": 362.1515197753906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.064,
"grad_norm": 0.1065516546368599,
"learning_rate": 6.349206349206349e-07,
"loss": -0.0945,
"num_tokens": 20406822.0,
"reward": 0.6141079664230347,
"reward_std": 0.5281961560249329,
"rewards/accuracy_reward_long_step": 0.1875,
"rewards/final_brier_reward_long_step": 0.23533864319324493,
"rewards/format_reward_long_step": 0.53125,
"rewards/stepwise_brier_reward_long_step": 0.4085933566093445,
"step": 40
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.95,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1020.0,
"completions/max_terminated_length": 1020.0,
"completions/mean_length": 306.8515625,
"completions/mean_terminated_length": 350.6875305175781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.0656,
"grad_norm": 0.21869583427906036,
"learning_rate": 6.507936507936507e-07,
"loss": -0.1106,
"num_tokens": 20919024.0,
"reward": 0.5630888342857361,
"reward_std": 0.48765885829925537,
"rewards/accuracy_reward_long_step": 0.15234375,
"rewards/final_brier_reward_long_step": 0.21809795498847961,
"rewards/format_reward_long_step": 0.5234375,
"rewards/stepwise_brier_reward_long_step": 0.37800735235214233,
"step": 41
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 956.0,
"completions/max_terminated_length": 956.0,
"completions/mean_length": 311.23828125,
"completions/mean_terminated_length": 344.9220886230469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0672,
"grad_norm": 0.12498702108860016,
"learning_rate": 6.666666666666666e-07,
"loss": -0.0945,
"num_tokens": 21415717.0,
"reward": 0.7428398132324219,
"reward_std": 0.5535950660705566,
"rewards/accuracy_reward_long_step": 0.30078125,
"rewards/final_brier_reward_long_step": 0.27300766110420227,
"rewards/format_reward_long_step": 0.5625,
"rewards/stepwise_brier_reward_long_step": 0.37022653222084045,
"step": 42
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.8,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 978.0,
"completions/max_terminated_length": 978.0,
"completions/mean_length": 316.40234375,
"completions/mean_terminated_length": 336.095458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 25.0,
"epoch": 0.0688,
"grad_norm": 0.10794218629598618,
"learning_rate": 6.825396825396826e-07,
"loss": -0.0312,
"num_tokens": 21930028.0,
"reward": 0.6837334632873535,
"reward_std": 0.5118536949157715,
"rewards/accuracy_reward_long_step": 0.2265625,
"rewards/final_brier_reward_long_step": 0.2446330040693283,
"rewards/format_reward_long_step": 0.57421875,
"rewards/stepwise_brier_reward_long_step": 0.43561333417892456,
"step": 43
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 999.0,
"completions/max_terminated_length": 999.0,
"completions/mean_length": 312.5703125,
"completions/mean_terminated_length": 339.059326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.0704,
"grad_norm": 0.08316317200660706,
"learning_rate": 6.984126984126983e-07,
"loss": -0.0437,
"num_tokens": 22445902.0,
"reward": 0.758315920829773,
"reward_std": 0.5356731414794922,
"rewards/accuracy_reward_long_step": 0.28515625,
"rewards/final_brier_reward_long_step": 0.2901049256324768,
"rewards/format_reward_long_step": 0.60546875,
"rewards/stepwise_brier_reward_long_step": 0.39159637689590454,
"step": 44
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.85,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 1011.0,
"completions/max_terminated_length": 1011.0,
"completions/mean_length": 316.65625,
"completions/mean_terminated_length": 336.36517333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.072,
"grad_norm": 0.14549246430397034,
"learning_rate": 7.142857142857143e-07,
"loss": -0.0613,
"num_tokens": 22949038.0,
"reward": 0.7944426536560059,
"reward_std": 0.5610437393188477,
"rewards/accuracy_reward_long_step": 0.26953125,
"rewards/final_brier_reward_long_step": 0.320908784866333,
"rewards/format_reward_long_step": 0.6484375,
"rewards/stepwise_brier_reward_long_step": 0.48186174035072327,
"step": 45
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 960.0,
"completions/max_terminated_length": 960.0,
"completions/mean_length": 295.67578125,
"completions/mean_terminated_length": 314.078857421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0736,
"grad_norm": 0.07323121279478073,
"learning_rate": 7.301587301587301e-07,
"loss": -0.0735,
"num_tokens": 23427059.0,
"reward": 0.6723222732543945,
"reward_std": 0.4763370752334595,
"rewards/accuracy_reward_long_step": 0.15234375,
"rewards/final_brier_reward_long_step": 0.26658162474632263,
"rewards/format_reward_long_step": 0.6875,
"rewards/stepwise_brier_reward_long_step": 0.43833261728286743,
"step": 46
},
{
"calib/answer_extract_rate": 0.0078125,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0078125,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 841.0,
"completions/max_terminated_length": 841.0,
"completions/mean_length": 306.3046875,
"completions/mean_terminated_length": 321.36883544921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 14.0,
"epoch": 0.0752,
"grad_norm": 0.2430606335401535,
"learning_rate": 7.46031746031746e-07,
"loss": -0.1118,
"num_tokens": 23915857.0,
"reward": 0.9021989703178406,
"reward_std": 0.5503741502761841,
"rewards/accuracy_reward_long_step": 0.31640625,
"rewards/final_brier_reward_long_step": 0.36550503969192505,
"rewards/format_reward_long_step": 0.7265625,
"rewards/stepwise_brier_reward_long_step": 0.5245407223701477,
"step": 47
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 898.0,
"completions/max_terminated_length": 898.0,
"completions/mean_length": 315.328125,
"completions/mean_terminated_length": 328.1463317871094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 63.0,
"epoch": 0.0768,
"grad_norm": 0.23907212913036346,
"learning_rate": 7.619047619047618e-07,
"loss": -0.0704,
"num_tokens": 24407109.0,
"reward": 0.8392512798309326,
"reward_std": 0.5024853944778442,
"rewards/accuracy_reward_long_step": 0.25390625,
"rewards/final_brier_reward_long_step": 0.34439170360565186,
"rewards/format_reward_long_step": 0.71875,
"rewards/stepwise_brier_reward_long_step": 0.5594882965087891,
"step": 48
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05078125,
"completions/max_length": 821.0,
"completions/max_terminated_length": 821.0,
"completions/mean_length": 306.22265625,
"completions/mean_terminated_length": 322.60491943359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.0784,
"grad_norm": 0.13986116647720337,
"learning_rate": 7.777777777777778e-07,
"loss": -0.0654,
"num_tokens": 24920814.0,
"reward": 0.9134366512298584,
"reward_std": 0.5091613531112671,
"rewards/accuracy_reward_long_step": 0.2734375,
"rewards/final_brier_reward_long_step": 0.3957996368408203,
"rewards/format_reward_long_step": 0.79296875,
"rewards/stepwise_brier_reward_long_step": 0.5782594680786133,
"step": 49
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 819.0,
"completions/max_terminated_length": 819.0,
"completions/mean_length": 281.90625,
"completions/mean_terminated_length": 295.7704772949219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.08,
"grad_norm": 0.22414922714233398,
"learning_rate": 7.936507936507936e-07,
"loss": -0.0835,
"num_tokens": 25417334.0,
"reward": 0.87409508228302,
"reward_std": 0.49227654933929443,
"rewards/accuracy_reward_long_step": 0.23046875,
"rewards/final_brier_reward_long_step": 0.35886436700820923,
"rewards/format_reward_long_step": 0.80859375,
"rewards/stepwise_brier_reward_long_step": 0.59845370054245,
"step": 50
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.88,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 998.0,
"completions/max_terminated_length": 998.0,
"completions/mean_length": 297.73046875,
"completions/mean_terminated_length": 304.8760070800781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.0816,
"grad_norm": 0.15504558384418488,
"learning_rate": 8.095238095238095e-07,
"loss": -0.0377,
"num_tokens": 25917625.0,
"reward": 0.8817519545555115,
"reward_std": 0.5145280361175537,
"rewards/accuracy_reward_long_step": 0.2265625,
"rewards/final_brier_reward_long_step": 0.36166319251060486,
"rewards/format_reward_long_step": 0.828125,
"rewards/stepwise_brier_reward_long_step": 0.6028447151184082,
"step": 51
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 777.0,
"completions/max_terminated_length": 777.0,
"completions/mean_length": 279.953125,
"completions/mean_terminated_length": 285.5298767089844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.0832,
"grad_norm": 0.06322144716978073,
"learning_rate": 8.253968253968253e-07,
"loss": -0.0324,
"num_tokens": 26420901.0,
"reward": 0.9491708278656006,
"reward_std": 0.4398835599422455,
"rewards/accuracy_reward_long_step": 0.25390625,
"rewards/final_brier_reward_long_step": 0.42088940739631653,
"rewards/format_reward_long_step": 0.8828125,
"rewards/stepwise_brier_reward_long_step": 0.594543993473053,
"step": 52
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 888.0,
"completions/max_terminated_length": 888.0,
"completions/mean_length": 299.453125,
"completions/mean_terminated_length": 307.8714599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.0848,
"grad_norm": 0.045589037239551544,
"learning_rate": 8.412698412698413e-07,
"loss": -0.0128,
"num_tokens": 26920409.0,
"reward": 0.8190128803253174,
"reward_std": 0.3742133677005768,
"rewards/accuracy_reward_long_step": 0.16015625,
"rewards/final_brier_reward_long_step": 0.3180277347564697,
"rewards/format_reward_long_step": 0.84375,
"rewards/stepwise_brier_reward_long_step": 0.6298986673355103,
"step": 53
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 840.0,
"completions/max_terminated_length": 840.0,
"completions/mean_length": 290.125,
"completions/mean_terminated_length": 293.5652160644531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.0864,
"grad_norm": 0.2063865065574646,
"learning_rate": 8.57142857142857e-07,
"loss": -0.0173,
"num_tokens": 27414017.0,
"reward": 0.9507964253425598,
"reward_std": 0.426396906375885,
"rewards/accuracy_reward_long_step": 0.25,
"rewards/final_brier_reward_long_step": 0.38583073019981384,
"rewards/format_reward_long_step": 0.890625,
"rewards/stepwise_brier_reward_long_step": 0.636104941368103,
"step": 54
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 757.0,
"completions/max_terminated_length": 757.0,
"completions/mean_length": 291.60546875,
"completions/mean_terminated_length": 299.8031921386719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 28.0,
"epoch": 0.088,
"grad_norm": 0.12576599419116974,
"learning_rate": 8.73015873015873e-07,
"loss": -0.0195,
"num_tokens": 27925500.0,
"reward": 1.0254812240600586,
"reward_std": 0.5260324478149414,
"rewards/accuracy_reward_long_step": 0.31640625,
"rewards/final_brier_reward_long_step": 0.45056432485580444,
"rewards/format_reward_long_step": 0.85546875,
"rewards/stepwise_brier_reward_long_step": 0.674798309803009,
"step": 55
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 969.0,
"completions/max_terminated_length": 969.0,
"completions/mean_length": 289.62890625,
"completions/mean_terminated_length": 293.0632629394531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.0896,
"grad_norm": 0.13417616486549377,
"learning_rate": 8.888888888888888e-07,
"loss": -0.0375,
"num_tokens": 28431053.0,
"reward": 1.1285545825958252,
"reward_std": 0.4153571128845215,
"rewards/accuracy_reward_long_step": 0.38671875,
"rewards/final_brier_reward_long_step": 0.4956166744232178,
"rewards/format_reward_long_step": 0.92578125,
"rewards/stepwise_brier_reward_long_step": 0.6201643943786621,
"step": 56
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 763.0,
"completions/max_terminated_length": 763.0,
"completions/mean_length": 269.65625,
"completions/mean_terminated_length": 276.1280212402344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.0912,
"grad_norm": 0.06763328611850739,
"learning_rate": 9.047619047619047e-07,
"loss": -0.0317,
"num_tokens": 28913637.0,
"reward": 0.9394167065620422,
"reward_std": 0.3821975290775299,
"rewards/accuracy_reward_long_step": 0.2421875,
"rewards/final_brier_reward_long_step": 0.423524409532547,
"rewards/format_reward_long_step": 0.91796875,
"rewards/stepwise_brier_reward_long_step": 0.5294549465179443,
"step": 57
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 827.0,
"completions/max_terminated_length": 827.0,
"completions/mean_length": 271.98828125,
"completions/mean_terminated_length": 277.4063720703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.0928,
"grad_norm": 0.25471770763397217,
"learning_rate": 9.206349206349205e-07,
"loss": -0.0193,
"num_tokens": 29408858.0,
"reward": 1.0430493354797363,
"reward_std": 0.45021143555641174,
"rewards/accuracy_reward_long_step": 0.30078125,
"rewards/final_brier_reward_long_step": 0.45864561200141907,
"rewards/format_reward_long_step": 0.90625,
"rewards/stepwise_brier_reward_long_step": 0.6979269981384277,
"step": 58
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 999.0,
"completions/max_terminated_length": 999.0,
"completions/mean_length": 268.2109375,
"completions/mean_terminated_length": 272.46826171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.0944,
"grad_norm": 0.08365736901760101,
"learning_rate": 9.365079365079365e-07,
"loss": -0.0398,
"num_tokens": 29906672.0,
"reward": 0.9959282875061035,
"reward_std": 0.3980240225791931,
"rewards/accuracy_reward_long_step": 0.28515625,
"rewards/final_brier_reward_long_step": 0.4108448326587677,
"rewards/format_reward_long_step": 0.8984375,
"rewards/stepwise_brier_reward_long_step": 0.6353680491447449,
"step": 59
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 870.0,
"completions/max_terminated_length": 870.0,
"completions/mean_length": 270.6953125,
"completions/mean_terminated_length": 274.9920654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 29.0,
"epoch": 0.096,
"grad_norm": 0.037650614976882935,
"learning_rate": 9.523809523809522e-07,
"loss": -0.0363,
"num_tokens": 30394242.0,
"reward": 0.9999738931655884,
"reward_std": 0.4165264964103699,
"rewards/accuracy_reward_long_step": 0.2734375,
"rewards/final_brier_reward_long_step": 0.44108301401138306,
"rewards/format_reward_long_step": 0.90234375,
"rewards/stepwise_brier_reward_long_step": 0.66037517786026,
"step": 60
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 865.0,
"completions/max_terminated_length": 865.0,
"completions/mean_length": 265.21875,
"completions/mean_terminated_length": 267.3070983886719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.0976,
"grad_norm": 0.106496162712574,
"learning_rate": 9.682539682539682e-07,
"loss": -0.0001,
"num_tokens": 30893266.0,
"reward": 1.1621458530426025,
"reward_std": 0.4202546775341034,
"rewards/accuracy_reward_long_step": 0.390625,
"rewards/final_brier_reward_long_step": 0.533796489238739,
"rewards/format_reward_long_step": 0.94140625,
"rewards/stepwise_brier_reward_long_step": 0.6694742441177368,
"step": 61
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 643.0,
"completions/max_terminated_length": 643.0,
"completions/mean_length": 263.109375,
"completions/mean_terminated_length": 266.229248046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.0992,
"grad_norm": 0.10262063890695572,
"learning_rate": 9.84126984126984e-07,
"loss": 0.0064,
"num_tokens": 31379758.0,
"reward": 1.0871508121490479,
"reward_std": 0.3243914842605591,
"rewards/accuracy_reward_long_step": 0.3203125,
"rewards/final_brier_reward_long_step": 0.49898362159729004,
"rewards/format_reward_long_step": 0.94140625,
"rewards/stepwise_brier_reward_long_step": 0.6855573058128357,
"step": 62
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 797.0,
"completions/max_terminated_length": 797.0,
"completions/mean_length": 269.95703125,
"completions/mean_terminated_length": 271.0157165527344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.1008,
"grad_norm": 0.045267656445503235,
"learning_rate": 1e-06,
"loss": 0.0051,
"num_tokens": 31875611.0,
"reward": 1.0248790979385376,
"reward_std": 0.3271501064300537,
"rewards/accuracy_reward_long_step": 0.26171875,
"rewards/final_brier_reward_long_step": 0.5174949169158936,
"rewards/format_reward_long_step": 0.9609375,
"rewards/stepwise_brier_reward_long_step": 0.6132714152336121,
"step": 63
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 709.0,
"completions/max_terminated_length": 709.0,
"completions/mean_length": 243.5,
"completions/mean_terminated_length": 245.41732788085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.1024,
"grad_norm": 0.09396693110466003,
"learning_rate": 9.98220640569395e-07,
"loss": -0.0249,
"num_tokens": 32375531.0,
"reward": 1.0874074697494507,
"reward_std": 0.405730664730072,
"rewards/accuracy_reward_long_step": 0.3046875,
"rewards/final_brier_reward_long_step": 0.4932839870452881,
"rewards/format_reward_long_step": 0.94140625,
"rewards/stepwise_brier_reward_long_step": 0.7547836899757385,
"step": 64
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 603.0,
"completions/max_terminated_length": 603.0,
"completions/mean_length": 238.4765625,
"completions/mean_terminated_length": 242.2619171142578,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.104,
"grad_norm": 0.10586308687925339,
"learning_rate": 9.9644128113879e-07,
"loss": -0.0241,
"num_tokens": 32865229.0,
"reward": 1.0684640407562256,
"reward_std": 0.37198448181152344,
"rewards/accuracy_reward_long_step": 0.30078125,
"rewards/final_brier_reward_long_step": 0.5142987966537476,
"rewards/format_reward_long_step": 0.9375,
"rewards/stepwise_brier_reward_long_step": 0.68143230676651,
"step": 65
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 795.0,
"completions/max_terminated_length": 795.0,
"completions/mean_length": 235.64453125,
"completions/mean_terminated_length": 238.43875122070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.1056,
"grad_norm": 0.06174841523170471,
"learning_rate": 9.94661921708185e-07,
"loss": -0.0329,
"num_tokens": 33359506.0,
"reward": 1.0320240259170532,
"reward_std": 0.3543888330459595,
"rewards/accuracy_reward_long_step": 0.28125,
"rewards/final_brier_reward_long_step": 0.523512601852417,
"rewards/format_reward_long_step": 0.94140625,
"rewards/stepwise_brier_reward_long_step": 0.5967710614204407,
"step": 66
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 627.0,
"completions/max_terminated_length": 627.0,
"completions/mean_length": 236.34375,
"completions/mean_terminated_length": 237.27059936523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.1072,
"grad_norm": 0.07356097549200058,
"learning_rate": 9.9288256227758e-07,
"loss": -0.0126,
"num_tokens": 33855346.0,
"reward": 1.2128088474273682,
"reward_std": 0.32791173458099365,
"rewards/accuracy_reward_long_step": 0.3828125,
"rewards/final_brier_reward_long_step": 0.6027936935424805,
"rewards/format_reward_long_step": 0.96484375,
"rewards/stepwise_brier_reward_long_step": 0.7875038385391235,
"step": 67
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1007.0,
"completions/max_terminated_length": 1007.0,
"completions/mean_length": 241.8046875,
"completions/mean_terminated_length": 242.75296020507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.1088,
"grad_norm": 0.09495385736227036,
"learning_rate": 9.91103202846975e-07,
"loss": 0.0023,
"num_tokens": 34342288.0,
"reward": 1.1574406623840332,
"reward_std": 0.36004209518432617,
"rewards/accuracy_reward_long_step": 0.328125,
"rewards/final_brier_reward_long_step": 0.6040390729904175,
"rewards/format_reward_long_step": 0.96875,
"rewards/stepwise_brier_reward_long_step": 0.7757238149642944,
"step": 68
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 808.0,
"completions/max_terminated_length": 808.0,
"completions/mean_length": 220.72265625,
"completions/mean_terminated_length": 222.46063232421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.1104,
"grad_norm": 0.4110874533653259,
"learning_rate": 9.8932384341637e-07,
"loss": 0.009,
"num_tokens": 34802673.0,
"reward": 1.3306258916854858,
"reward_std": 0.33039307594299316,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.6564062833786011,
"rewards/format_reward_long_step": 0.98046875,
"rewards/stepwise_brier_reward_long_step": 0.7676596641540527,
"step": 69
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 479.0,
"completions/max_terminated_length": 479.0,
"completions/mean_length": 224.27734375,
"completions/mean_terminated_length": 224.27734375,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.112,
"grad_norm": 0.05017966777086258,
"learning_rate": 9.87544483985765e-07,
"loss": -0.0149,
"num_tokens": 35287528.0,
"reward": 1.2564759254455566,
"reward_std": 0.27206528186798096,
"rewards/accuracy_reward_long_step": 0.421875,
"rewards/final_brier_reward_long_step": 0.6693449020385742,
"rewards/format_reward_long_step": 0.97265625,
"rewards/stepwise_brier_reward_long_step": 0.7237462401390076,
"step": 70
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 550.0,
"completions/max_terminated_length": 550.0,
"completions/mean_length": 218.94140625,
"completions/mean_terminated_length": 218.94140625,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.1136,
"grad_norm": 0.060872942209243774,
"learning_rate": 9.8576512455516e-07,
"loss": -0.0093,
"num_tokens": 35762137.0,
"reward": 1.3191676139831543,
"reward_std": 0.35025539994239807,
"rewards/accuracy_reward_long_step": 0.4609375,
"rewards/final_brier_reward_long_step": 0.6817148327827454,
"rewards/format_reward_long_step": 0.984375,
"rewards/stepwise_brier_reward_long_step": 0.7824558615684509,
"step": 71
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 705.0,
"completions/max_terminated_length": 705.0,
"completions/mean_length": 223.96875,
"completions/mean_terminated_length": 223.96875,
"completions/min_length": 55.0,
"completions/min_terminated_length": 55.0,
"epoch": 0.1152,
"grad_norm": 0.11113214492797852,
"learning_rate": 9.83985765124555e-07,
"loss": 0.0053,
"num_tokens": 36233089.0,
"reward": 1.1259610652923584,
"reward_std": 0.26902925968170166,
"rewards/accuracy_reward_long_step": 0.29296875,
"rewards/final_brier_reward_long_step": 0.6245523691177368,
"rewards/format_reward_long_step": 0.97265625,
"rewards/stepwise_brier_reward_long_step": 0.7621045112609863,
"step": 72
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 589.0,
"completions/max_terminated_length": 589.0,
"completions/mean_length": 214.41015625,
"completions/mean_terminated_length": 216.09841918945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.1168,
"grad_norm": 0.15858663618564606,
"learning_rate": 9.8220640569395e-07,
"loss": -0.0158,
"num_tokens": 36715946.0,
"reward": 1.0615253448486328,
"reward_std": 0.2766742706298828,
"rewards/accuracy_reward_long_step": 0.234375,
"rewards/final_brier_reward_long_step": 0.6290937662124634,
"rewards/format_reward_long_step": 0.96484375,
"rewards/stepwise_brier_reward_long_step": 0.7498202919960022,
"step": 73
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 600.0,
"completions/max_terminated_length": 600.0,
"completions/mean_length": 214.25390625,
"completions/mean_terminated_length": 214.25390625,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.1184,
"grad_norm": 0.5460712909698486,
"learning_rate": 9.804270462633451e-07,
"loss": -0.011,
"num_tokens": 37180563.0,
"reward": 1.268796443939209,
"reward_std": 0.3486781716346741,
"rewards/accuracy_reward_long_step": 0.40625,
"rewards/final_brier_reward_long_step": 0.6875852346420288,
"rewards/format_reward_long_step": 0.98046875,
"rewards/stepwise_brier_reward_long_step": 0.8016629219055176,
"step": 74
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 712.0,
"completions/max_terminated_length": 712.0,
"completions/mean_length": 208.28515625,
"completions/mean_terminated_length": 209.92520141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.12,
"grad_norm": 0.06162435933947563,
"learning_rate": 9.786476868327401e-07,
"loss": -0.0327,
"num_tokens": 37667916.0,
"reward": 1.14532470703125,
"reward_std": 0.30212295055389404,
"rewards/accuracy_reward_long_step": 0.30859375,
"rewards/final_brier_reward_long_step": 0.6789199113845825,
"rewards/format_reward_long_step": 0.96484375,
"rewards/stepwise_brier_reward_long_step": 0.7383161187171936,
"step": 75
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 585.0,
"completions/max_terminated_length": 585.0,
"completions/mean_length": 216.5234375,
"completions/mean_terminated_length": 216.5234375,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.1216,
"grad_norm": 0.07474726438522339,
"learning_rate": 9.768683274021351e-07,
"loss": 0.0095,
"num_tokens": 38143594.0,
"reward": 1.2285196781158447,
"reward_std": 0.2515240013599396,
"rewards/accuracy_reward_long_step": 0.3671875,
"rewards/final_brier_reward_long_step": 0.715578556060791,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7375626564025879,
"step": 76
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 573.0,
"completions/max_terminated_length": 573.0,
"completions/mean_length": 209.671875,
"completions/mean_terminated_length": 209.671875,
"completions/min_length": 83.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.1232,
"grad_norm": 0.04926946386694908,
"learning_rate": 9.750889679715302e-07,
"loss": 0.0161,
"num_tokens": 38622302.0,
"reward": 1.2208093404769897,
"reward_std": 0.23148852586746216,
"rewards/accuracy_reward_long_step": 0.34375,
"rewards/final_brier_reward_long_step": 0.7191964387893677,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.8046656250953674,
"step": 77
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 500.0,
"completions/max_terminated_length": 500.0,
"completions/mean_length": 205.0,
"completions/mean_terminated_length": 205.0,
"completions/min_length": 62.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.1248,
"grad_norm": 0.05082042142748833,
"learning_rate": 9.733096085409252e-07,
"loss": -0.0045,
"num_tokens": 39095686.0,
"reward": 1.1963913440704346,
"reward_std": 0.2708578109741211,
"rewards/accuracy_reward_long_step": 0.33203125,
"rewards/final_brier_reward_long_step": 0.7243698239326477,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7565078735351562,
"step": 78
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 692.0,
"completions/max_terminated_length": 692.0,
"completions/mean_length": 209.58203125,
"completions/mean_terminated_length": 209.58203125,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.1264,
"grad_norm": 0.05406387895345688,
"learning_rate": 9.715302491103202e-07,
"loss": -0.0165,
"num_tokens": 39573555.0,
"reward": 1.2039846181869507,
"reward_std": 0.21231237053871155,
"rewards/accuracy_reward_long_step": 0.328125,
"rewards/final_brier_reward_long_step": 0.7417787313461304,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7850972414016724,
"step": 79
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 389.0,
"completions/max_terminated_length": 389.0,
"completions/mean_length": 200.73046875,
"completions/mean_terminated_length": 200.73046875,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.128,
"grad_norm": 0.03713912516832352,
"learning_rate": 9.697508896797152e-07,
"loss": -0.0034,
"num_tokens": 40051286.0,
"reward": 1.1338202953338623,
"reward_std": 0.22740787267684937,
"rewards/accuracy_reward_long_step": 0.25390625,
"rewards/final_brier_reward_long_step": 0.7587928771972656,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7843012809753418,
"step": 80
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 581.0,
"completions/max_terminated_length": 581.0,
"completions/mean_length": 197.05859375,
"completions/mean_terminated_length": 197.05859375,
"completions/min_length": 90.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.1296,
"grad_norm": 0.13255728781223297,
"learning_rate": 9.679715302491102e-07,
"loss": -0.0323,
"num_tokens": 40525805.0,
"reward": 1.2546930313110352,
"reward_std": 0.29959502816200256,
"rewards/accuracy_reward_long_step": 0.37890625,
"rewards/final_brier_reward_long_step": 0.7437311410903931,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7750412225723267,
"step": 81
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 397.0,
"completions/max_terminated_length": 397.0,
"completions/mean_length": 185.1015625,
"completions/mean_terminated_length": 185.1015625,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.1312,
"grad_norm": 0.03457874432206154,
"learning_rate": 9.661921708185054e-07,
"loss": 0.0033,
"num_tokens": 41000735.0,
"reward": 1.2457207441329956,
"reward_std": 0.1934887319803238,
"rewards/accuracy_reward_long_step": 0.359375,
"rewards/final_brier_reward_long_step": 0.7806586027145386,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7647244334220886,
"step": 82
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 452.0,
"completions/max_terminated_length": 452.0,
"completions/mean_length": 193.9140625,
"completions/mean_terminated_length": 193.9140625,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.1328,
"grad_norm": 0.04606304317712784,
"learning_rate": 9.644128113879002e-07,
"loss": -0.0082,
"num_tokens": 41472457.0,
"reward": 1.2768468856811523,
"reward_std": 0.25950515270233154,
"rewards/accuracy_reward_long_step": 0.40625,
"rewards/final_brier_reward_long_step": 0.7273233532905579,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7785016894340515,
"step": 83
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 564.0,
"completions/max_terminated_length": 564.0,
"completions/mean_length": 187.13671875,
"completions/mean_terminated_length": 187.13671875,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.1344,
"grad_norm": 0.04770605266094208,
"learning_rate": 9.626334519572953e-07,
"loss": 0.003,
"num_tokens": 41954644.0,
"reward": 1.3016421794891357,
"reward_std": 0.2636979818344116,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.7189725637435913,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7610331773757935,
"step": 84
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 380.0,
"completions/max_terminated_length": 380.0,
"completions/mean_length": 185.37109375,
"completions/mean_terminated_length": 186.09805297851562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.136,
"grad_norm": 0.038233935832977295,
"learning_rate": 9.608540925266903e-07,
"loss": -0.0246,
"num_tokens": 42421019.0,
"reward": 1.3664637804031372,
"reward_std": 0.2260427474975586,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.6927652359008789,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.765277624130249,
"step": 85
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 552.0,
"completions/max_terminated_length": 552.0,
"completions/mean_length": 192.4375,
"completions/mean_terminated_length": 192.4375,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.1376,
"grad_norm": 0.031944431364536285,
"learning_rate": 9.590747330960853e-07,
"loss": -0.0075,
"num_tokens": 42885091.0,
"reward": 1.2885265350341797,
"reward_std": 0.21891814470291138,
"rewards/accuracy_reward_long_step": 0.41796875,
"rewards/final_brier_reward_long_step": 0.7211390733718872,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7689046859741211,
"step": 86
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 192.12890625,
"completions/mean_terminated_length": 192.12890625,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.1392,
"grad_norm": 0.053836286067962646,
"learning_rate": 9.572953736654805e-07,
"loss": 0.0032,
"num_tokens": 43357156.0,
"reward": 1.3813579082489014,
"reward_std": 0.25391727685928345,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.6488757729530334,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7124930620193481,
"step": 87
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 579.0,
"completions/max_terminated_length": 579.0,
"completions/mean_length": 188.6640625,
"completions/mean_terminated_length": 188.6640625,
"completions/min_length": 87.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.1408,
"grad_norm": 0.04048394784331322,
"learning_rate": 9.555160142348753e-07,
"loss": -0.0004,
"num_tokens": 43821726.0,
"reward": 1.2759735584259033,
"reward_std": 0.23172587156295776,
"rewards/accuracy_reward_long_step": 0.40234375,
"rewards/final_brier_reward_long_step": 0.7274124622344971,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7905445694923401,
"step": 88
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 534.0,
"completions/max_terminated_length": 534.0,
"completions/mean_length": 190.31640625,
"completions/mean_terminated_length": 190.31640625,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.1424,
"grad_norm": 0.035542842000722885,
"learning_rate": 9.537366548042705e-07,
"loss": 0.0089,
"num_tokens": 44299575.0,
"reward": 1.2254152297973633,
"reward_std": 0.2625795304775238,
"rewards/accuracy_reward_long_step": 0.359375,
"rewards/final_brier_reward_long_step": 0.7171218395233154,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7626639604568481,
"step": 89
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 186.6171875,
"completions/mean_terminated_length": 186.6171875,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.144,
"grad_norm": 0.04348074272274971,
"learning_rate": 9.519572953736655e-07,
"loss": -0.0097,
"num_tokens": 44766493.0,
"reward": 1.2421379089355469,
"reward_std": 0.2381449192762375,
"rewards/accuracy_reward_long_step": 0.37109375,
"rewards/final_brier_reward_long_step": 0.7263898849487305,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7577868700027466,
"step": 90
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 527.0,
"completions/max_terminated_length": 527.0,
"completions/mean_length": 193.671875,
"completions/mean_terminated_length": 194.43138122558594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.1456,
"grad_norm": 0.10439097136259079,
"learning_rate": 9.501779359430605e-07,
"loss": 0.014,
"num_tokens": 45226529.0,
"reward": 1.2801823616027832,
"reward_std": 0.22653043270111084,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.6875629425048828,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7534794807434082,
"step": 91
},
{
"calib/answer_extract_rate": 0.00390625,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.00390625,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 472.0,
"completions/max_terminated_length": 472.0,
"completions/mean_length": 192.19921875,
"completions/mean_terminated_length": 192.19921875,
"completions/min_length": 68.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.1472,
"grad_norm": 0.038028016686439514,
"learning_rate": 9.483985765124555e-07,
"loss": 0.0038,
"num_tokens": 45698404.0,
"reward": 1.2324891090393066,
"reward_std": 0.2521362900733948,
"rewards/accuracy_reward_long_step": 0.37890625,
"rewards/final_brier_reward_long_step": 0.7127734422683716,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7171825766563416,
"step": 92
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 394.0,
"completions/max_terminated_length": 394.0,
"completions/mean_length": 186.05859375,
"completions/mean_terminated_length": 186.05859375,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.1488,
"grad_norm": 0.0344776026904583,
"learning_rate": 9.466192170818504e-07,
"loss": 0.0051,
"num_tokens": 46169131.0,
"reward": 1.2282606363296509,
"reward_std": 0.16655448079109192,
"rewards/accuracy_reward_long_step": 0.359375,
"rewards/final_brier_reward_long_step": 0.7309889793395996,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7523662447929382,
"step": 93
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 434.0,
"completions/max_terminated_length": 434.0,
"completions/mean_length": 190.9765625,
"completions/mean_terminated_length": 190.9765625,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.1504,
"grad_norm": 0.03720776364207268,
"learning_rate": 9.448398576512455e-07,
"loss": 0.0102,
"num_tokens": 46636349.0,
"reward": 1.2539737224578857,
"reward_std": 0.2229781448841095,
"rewards/accuracy_reward_long_step": 0.37890625,
"rewards/final_brier_reward_long_step": 0.7260522246360779,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7742174863815308,
"step": 94
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 563.0,
"completions/max_terminated_length": 563.0,
"completions/mean_length": 191.19921875,
"completions/mean_terminated_length": 191.94903564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.152,
"grad_norm": 0.03717532381415367,
"learning_rate": 9.430604982206405e-07,
"loss": -0.0118,
"num_tokens": 47117128.0,
"reward": 1.2085305452346802,
"reward_std": 0.22574907541275024,
"rewards/accuracy_reward_long_step": 0.3359375,
"rewards/final_brier_reward_long_step": 0.7332504391670227,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.7805593013763428,
"step": 95
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 707.0,
"completions/max_terminated_length": 707.0,
"completions/mean_length": 193.6953125,
"completions/mean_terminated_length": 193.6953125,
"completions/min_length": 78.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.1536,
"grad_norm": 0.03478072211146355,
"learning_rate": 9.412811387900355e-07,
"loss": -0.0084,
"num_tokens": 47604330.0,
"reward": 1.3053498268127441,
"reward_std": 0.20083755254745483,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.6733219027519226,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7355772256851196,
"step": 96
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 397.0,
"completions/max_terminated_length": 397.0,
"completions/mean_length": 196.6015625,
"completions/mean_terminated_length": 196.6015625,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.1552,
"grad_norm": 0.040781840682029724,
"learning_rate": 9.395017793594306e-07,
"loss": -0.0066,
"num_tokens": 48071700.0,
"reward": 1.2769510746002197,
"reward_std": 0.18185681104660034,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7033559679985046,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7560105919837952,
"step": 97
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 510.0,
"completions/max_terminated_length": 510.0,
"completions/mean_length": 184.20703125,
"completions/mean_terminated_length": 184.20703125,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.1568,
"grad_norm": 0.04920961335301399,
"learning_rate": 9.377224199288256e-07,
"loss": -0.0063,
"num_tokens": 48530185.0,
"reward": 1.2058653831481934,
"reward_std": 0.1759049892425537,
"rewards/accuracy_reward_long_step": 0.32421875,
"rewards/final_brier_reward_long_step": 0.7487024068832397,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7856966853141785,
"step": 98
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 381.0,
"completions/max_terminated_length": 381.0,
"completions/mean_length": 186.4609375,
"completions/mean_terminated_length": 186.4609375,
"completions/min_length": 76.0,
"completions/min_terminated_length": 76.0,
"epoch": 0.1584,
"grad_norm": 0.03607820346951485,
"learning_rate": 9.359430604982206e-07,
"loss": 0.002,
"num_tokens": 49009167.0,
"reward": 1.2021329402923584,
"reward_std": 0.17198419570922852,
"rewards/accuracy_reward_long_step": 0.30859375,
"rewards/final_brier_reward_long_step": 0.7756035327911377,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7985529899597168,
"step": 99
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 404.0,
"completions/max_terminated_length": 404.0,
"completions/mean_length": 193.265625,
"completions/mean_terminated_length": 193.265625,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.16,
"grad_norm": 0.03377021104097366,
"learning_rate": 9.341637010676157e-07,
"loss": 0.0073,
"num_tokens": 49501779.0,
"reward": 1.2661197185516357,
"reward_std": 0.1904180347919464,
"rewards/accuracy_reward_long_step": 0.40625,
"rewards/final_brier_reward_long_step": 0.6986390352249146,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7408397197723389,
"step": 100
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 703.0,
"completions/max_terminated_length": 703.0,
"completions/mean_length": 191.34765625,
"completions/mean_terminated_length": 192.09805297851562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.1616,
"grad_norm": 0.04310128837823868,
"learning_rate": 9.323843416370106e-07,
"loss": -0.0187,
"num_tokens": 49985236.0,
"reward": 1.2589505910873413,
"reward_std": 0.15486913919448853,
"rewards/accuracy_reward_long_step": 0.38671875,
"rewards/final_brier_reward_long_step": 0.7101609110832214,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7943914532661438,
"step": 101
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 716.0,
"completions/max_terminated_length": 716.0,
"completions/mean_length": 197.26953125,
"completions/mean_terminated_length": 197.26953125,
"completions/min_length": 100.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.1632,
"grad_norm": 0.06449352204799652,
"learning_rate": 9.306049822064056e-07,
"loss": -0.0023,
"num_tokens": 50458833.0,
"reward": 1.2103374004364014,
"reward_std": 0.17173272371292114,
"rewards/accuracy_reward_long_step": 0.33203125,
"rewards/final_brier_reward_long_step": 0.7552437782287598,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7736056447029114,
"step": 102
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 449.0,
"completions/max_terminated_length": 449.0,
"completions/mean_length": 196.171875,
"completions/mean_terminated_length": 196.171875,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.1648,
"grad_norm": 0.03428987041115761,
"learning_rate": 9.288256227758006e-07,
"loss": -0.0075,
"num_tokens": 50948197.0,
"reward": 1.2849457263946533,
"reward_std": 0.22478044033050537,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.6716355085372925,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7415850758552551,
"step": 103
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 388.0,
"completions/max_terminated_length": 388.0,
"completions/mean_length": 189.1171875,
"completions/mean_terminated_length": 189.1171875,
"completions/min_length": 100.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.1664,
"grad_norm": 0.03708449751138687,
"learning_rate": 9.270462633451957e-07,
"loss": 0.0166,
"num_tokens": 51417579.0,
"reward": 1.3035290241241455,
"reward_std": 0.2233991026878357,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.6784660220146179,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7544001936912537,
"step": 104
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 467.0,
"completions/max_terminated_length": 467.0,
"completions/mean_length": 199.61328125,
"completions/mean_terminated_length": 199.61328125,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.168,
"grad_norm": 0.04729332774877548,
"learning_rate": 9.252669039145908e-07,
"loss": -0.0052,
"num_tokens": 51903344.0,
"reward": 1.32490873336792,
"reward_std": 0.23365336656570435,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.6609004139900208,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7559216618537903,
"step": 105
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 434.0,
"completions/max_terminated_length": 434.0,
"completions/mean_length": 199.90625,
"completions/mean_terminated_length": 199.90625,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.1696,
"grad_norm": 0.03533056378364563,
"learning_rate": 9.234875444839857e-07,
"loss": 0.0067,
"num_tokens": 52395024.0,
"reward": 1.1930384635925293,
"reward_std": 0.1815432459115982,
"rewards/accuracy_reward_long_step": 0.3203125,
"rewards/final_brier_reward_long_step": 0.7636566162109375,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7272477149963379,
"step": 106
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 354.0,
"completions/max_terminated_length": 354.0,
"completions/mean_length": 190.7265625,
"completions/mean_terminated_length": 190.7265625,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.1712,
"grad_norm": 0.03335587680339813,
"learning_rate": 9.217081850533808e-07,
"loss": 0.0079,
"num_tokens": 52879418.0,
"reward": 1.2246638536453247,
"reward_std": 0.22744205594062805,
"rewards/accuracy_reward_long_step": 0.359375,
"rewards/final_brier_reward_long_step": 0.7271843552589417,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7495959997177124,
"step": 107
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 590.0,
"completions/max_terminated_length": 590.0,
"completions/mean_length": 195.2109375,
"completions/mean_terminated_length": 195.2109375,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.1728,
"grad_norm": 0.03303585201501846,
"learning_rate": 9.199288256227757e-07,
"loss": 0.0029,
"num_tokens": 53356184.0,
"reward": 1.2511444091796875,
"reward_std": 0.16916480660438538,
"rewards/accuracy_reward_long_step": 0.37890625,
"rewards/final_brier_reward_long_step": 0.7238613367080688,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7650911808013916,
"step": 108
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 534.0,
"completions/max_terminated_length": 534.0,
"completions/mean_length": 194.62890625,
"completions/mean_terminated_length": 194.62890625,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.1744,
"grad_norm": 0.03541121259331703,
"learning_rate": 9.181494661921708e-07,
"loss": -0.0096,
"num_tokens": 53823321.0,
"reward": 1.2710667848587036,
"reward_std": 0.17301318049430847,
"rewards/accuracy_reward_long_step": 0.41015625,
"rewards/final_brier_reward_long_step": 0.6999242305755615,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7437179684638977,
"step": 109
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 431.0,
"completions/max_terminated_length": 431.0,
"completions/mean_length": 197.36328125,
"completions/mean_terminated_length": 197.36328125,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.176,
"grad_norm": 0.032131701707839966,
"learning_rate": 9.163701067615657e-07,
"loss": 0.0068,
"num_tokens": 54302126.0,
"reward": 1.3117148876190186,
"reward_std": 0.232276052236557,
"rewards/accuracy_reward_long_step": 0.45703125,
"rewards/final_brier_reward_long_step": 0.6579011678695679,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7686457633972168,
"step": 110
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 508.0,
"completions/max_terminated_length": 508.0,
"completions/mean_length": 199.5078125,
"completions/mean_terminated_length": 199.5078125,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.1776,
"grad_norm": 0.03558618575334549,
"learning_rate": 9.145907473309609e-07,
"loss": -0.0095,
"num_tokens": 54778832.0,
"reward": 1.2715282440185547,
"reward_std": 0.17849522829055786,
"rewards/accuracy_reward_long_step": 0.40234375,
"rewards/final_brier_reward_long_step": 0.7140308618545532,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7627072930335999,
"step": 111
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 399.0,
"completions/max_terminated_length": 399.0,
"completions/mean_length": 193.99609375,
"completions/mean_terminated_length": 193.99609375,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.1792,
"grad_norm": 0.048463039100170135,
"learning_rate": 9.128113879003559e-07,
"loss": -0.0061,
"num_tokens": 55260847.0,
"reward": 1.2326127290725708,
"reward_std": 0.10357346385717392,
"rewards/accuracy_reward_long_step": 0.35546875,
"rewards/final_brier_reward_long_step": 0.7353038787841797,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.773271918296814,
"step": 112
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 521.0,
"completions/max_terminated_length": 521.0,
"completions/mean_length": 190.48828125,
"completions/mean_terminated_length": 190.48828125,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.1808,
"grad_norm": 0.036182910203933716,
"learning_rate": 9.110320284697508e-07,
"loss": -0.0012,
"num_tokens": 55716884.0,
"reward": 1.3314650058746338,
"reward_std": 0.21325276792049408,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.653796911239624,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.750187873840332,
"step": 113
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 422.0,
"completions/max_terminated_length": 422.0,
"completions/mean_length": 197.38671875,
"completions/mean_terminated_length": 197.38671875,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.1824,
"grad_norm": 0.0341506227850914,
"learning_rate": 9.092526690391459e-07,
"loss": -0.0149,
"num_tokens": 56184679.0,
"reward": 1.3492083549499512,
"reward_std": 0.1791898012161255,
"rewards/accuracy_reward_long_step": 0.4921875,
"rewards/final_brier_reward_long_step": 0.6604753732681274,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7676081657409668,
"step": 114
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 349.0,
"completions/max_terminated_length": 349.0,
"completions/mean_length": 203.13671875,
"completions/mean_terminated_length": 203.13671875,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.184,
"grad_norm": 0.03318583592772484,
"learning_rate": 9.074733096085408e-07,
"loss": 0.012,
"num_tokens": 56663218.0,
"reward": 1.3214986324310303,
"reward_std": 0.15533313155174255,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.6525156497955322,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7350417375564575,
"step": 115
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 450.0,
"completions/max_terminated_length": 450.0,
"completions/mean_length": 196.93359375,
"completions/mean_terminated_length": 196.93359375,
"completions/min_length": 9.0,
"completions/min_terminated_length": 9.0,
"epoch": 0.1856,
"grad_norm": 0.04418765380978584,
"learning_rate": 9.056939501779359e-07,
"loss": -0.0076,
"num_tokens": 57139865.0,
"reward": 1.3460896015167236,
"reward_std": 0.1907288283109665,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.668144941329956,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7709013223648071,
"step": 116
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 400.0,
"completions/max_terminated_length": 400.0,
"completions/mean_length": 199.77734375,
"completions/mean_terminated_length": 199.77734375,
"completions/min_length": 84.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.1872,
"grad_norm": 0.031136956065893173,
"learning_rate": 9.03914590747331e-07,
"loss": -0.0184,
"num_tokens": 57605152.0,
"reward": 1.36716890335083,
"reward_std": 0.19431088864803314,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.6628949642181396,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7745306491851807,
"step": 117
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 614.0,
"completions/max_terminated_length": 614.0,
"completions/mean_length": 202.734375,
"completions/mean_terminated_length": 202.734375,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.1888,
"grad_norm": 0.03184520825743675,
"learning_rate": 9.02135231316726e-07,
"loss": -0.0108,
"num_tokens": 58073260.0,
"reward": 1.2726809978485107,
"reward_std": 0.20508110523223877,
"rewards/accuracy_reward_long_step": 0.3984375,
"rewards/final_brier_reward_long_step": 0.7245535254478455,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7802332639694214,
"step": 118
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 504.0,
"completions/max_terminated_length": 504.0,
"completions/mean_length": 205.05859375,
"completions/mean_terminated_length": 205.05859375,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.1904,
"grad_norm": 0.033281709998846054,
"learning_rate": 9.00355871886121e-07,
"loss": 0.0056,
"num_tokens": 58547163.0,
"reward": 1.2008931636810303,
"reward_std": 0.12888304889202118,
"rewards/accuracy_reward_long_step": 0.30078125,
"rewards/final_brier_reward_long_step": 0.7928597927093506,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8075879216194153,
"step": 119
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 471.0,
"completions/max_terminated_length": 471.0,
"completions/mean_length": 207.6875,
"completions/mean_terminated_length": 207.6875,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.192,
"grad_norm": 0.03089357167482376,
"learning_rate": 8.98576512455516e-07,
"loss": -0.0066,
"num_tokens": 59008507.0,
"reward": 1.337794303894043,
"reward_std": 0.14488165080547333,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.6958640813827515,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.78031325340271,
"step": 120
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 402.0,
"completions/max_terminated_length": 402.0,
"completions/mean_length": 210.828125,
"completions/mean_terminated_length": 210.828125,
"completions/min_length": 96.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.1936,
"grad_norm": 0.03139025717973709,
"learning_rate": 8.96797153024911e-07,
"loss": 0.0168,
"num_tokens": 59465479.0,
"reward": 1.46078622341156,
"reward_std": 0.2090751975774765,
"rewards/accuracy_reward_long_step": 0.6171875,
"rewards/final_brier_reward_long_step": 0.6339257955551147,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7404694557189941,
"step": 121
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 448.0,
"completions/max_terminated_length": 448.0,
"completions/mean_length": 225.23828125,
"completions/mean_terminated_length": 225.23828125,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.1952,
"grad_norm": 0.03497195616364479,
"learning_rate": 8.950177935943059e-07,
"loss": 0.0103,
"num_tokens": 59950300.0,
"reward": 1.329277515411377,
"reward_std": 0.17536477744579315,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.7139711380004883,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7906389832496643,
"step": 122
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 491.0,
"completions/max_terminated_length": 491.0,
"completions/mean_length": 213.8671875,
"completions/mean_terminated_length": 213.8671875,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.1968,
"grad_norm": 0.03257234767079353,
"learning_rate": 8.93238434163701e-07,
"loss": -0.0167,
"num_tokens": 60433226.0,
"reward": 1.4588714838027954,
"reward_std": 0.15642720460891724,
"rewards/accuracy_reward_long_step": 0.59375,
"rewards/final_brier_reward_long_step": 0.6580198407173157,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8024659752845764,
"step": 123
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 456.0,
"completions/max_terminated_length": 456.0,
"completions/mean_length": 217.0,
"completions/mean_terminated_length": 217.0,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.1984,
"grad_norm": 0.03485409542918205,
"learning_rate": 8.91459074733096e-07,
"loss": 0.0054,
"num_tokens": 60912170.0,
"reward": 1.2661014795303345,
"reward_std": 0.16737821698188782,
"rewards/accuracy_reward_long_step": 0.3671875,
"rewards/final_brier_reward_long_step": 0.7646335959434509,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8310222625732422,
"step": 124
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 456.0,
"completions/max_terminated_length": 456.0,
"completions/mean_length": 222.78515625,
"completions/mean_terminated_length": 222.78515625,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.2,
"grad_norm": 0.030056415125727654,
"learning_rate": 8.896797153024911e-07,
"loss": 0.0011,
"num_tokens": 61399619.0,
"reward": 1.3533146381378174,
"reward_std": 0.18137666583061218,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.7190214991569519,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8036117553710938,
"step": 125
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 397.0,
"completions/max_terminated_length": 397.0,
"completions/mean_length": 212.23046875,
"completions/mean_terminated_length": 212.23046875,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.2016,
"grad_norm": 0.04059338942170143,
"learning_rate": 8.879003558718861e-07,
"loss": 0.0105,
"num_tokens": 61886622.0,
"reward": 1.386685848236084,
"reward_std": 0.2339630126953125,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.7192109823226929,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8119070529937744,
"step": 126
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 399.0,
"completions/max_terminated_length": 399.0,
"completions/mean_length": 212.375,
"completions/mean_terminated_length": 212.375,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.2032,
"grad_norm": 0.16263210773468018,
"learning_rate": 8.861209964412811e-07,
"loss": 0.0043,
"num_tokens": 62374030.0,
"reward": 1.2957684993743896,
"reward_std": 0.15368527173995972,
"rewards/accuracy_reward_long_step": 0.40625,
"rewards/final_brier_reward_long_step": 0.7560636401176453,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8098228573799133,
"step": 127
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 431.0,
"completions/max_terminated_length": 431.0,
"completions/mean_length": 215.7109375,
"completions/mean_terminated_length": 215.7109375,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.2048,
"grad_norm": 0.039368387311697006,
"learning_rate": 8.843416370106761e-07,
"loss": 0.0028,
"num_tokens": 62861236.0,
"reward": 1.355287790298462,
"reward_std": 0.21285982429981232,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.7233257293701172,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7915753126144409,
"step": 128
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 380.0,
"completions/max_terminated_length": 380.0,
"completions/mean_length": 208.1328125,
"completions/mean_terminated_length": 208.1328125,
"completions/min_length": 83.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.2064,
"grad_norm": 0.03151082620024681,
"learning_rate": 8.825622775800712e-07,
"loss": -0.004,
"num_tokens": 63345814.0,
"reward": 1.3812355995178223,
"reward_std": 0.2573654055595398,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.7093117237091064,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8234432339668274,
"step": 129
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 514.0,
"completions/max_terminated_length": 514.0,
"completions/mean_length": 216.51171875,
"completions/mean_terminated_length": 216.51171875,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.208,
"grad_norm": 0.03233165293931961,
"learning_rate": 8.807829181494661e-07,
"loss": 0.0018,
"num_tokens": 63809681.0,
"reward": 1.3362829685211182,
"reward_std": 0.12012840807437897,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.7555733919143677,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8083083033561707,
"step": 130
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 494.0,
"completions/max_terminated_length": 494.0,
"completions/mean_length": 213.39453125,
"completions/mean_terminated_length": 213.39453125,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.2096,
"grad_norm": 0.028420858085155487,
"learning_rate": 8.790035587188612e-07,
"loss": 0.0054,
"num_tokens": 64295150.0,
"reward": 1.3931207656860352,
"reward_std": 0.1800289750099182,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.7075101733207703,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7868478298187256,
"step": 131
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 471.0,
"completions/max_terminated_length": 471.0,
"completions/mean_length": 220.79296875,
"completions/mean_terminated_length": 220.79296875,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.2112,
"grad_norm": 0.030808866024017334,
"learning_rate": 8.772241992882562e-07,
"loss": -0.0,
"num_tokens": 64780465.0,
"reward": 1.2357356548309326,
"reward_std": 0.1879829466342926,
"rewards/accuracy_reward_long_step": 0.33984375,
"rewards/final_brier_reward_long_step": 0.7859160304069519,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7976517081260681,
"step": 132
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 527.0,
"completions/max_terminated_length": 527.0,
"completions/mean_length": 210.00390625,
"completions/mean_terminated_length": 210.00390625,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.2128,
"grad_norm": 0.030662264674901962,
"learning_rate": 8.754448398576512e-07,
"loss": -0.0054,
"num_tokens": 65255178.0,
"reward": 1.261284351348877,
"reward_std": 0.19841524958610535,
"rewards/accuracy_reward_long_step": 0.37890625,
"rewards/final_brier_reward_long_step": 0.7597503662109375,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7775742411613464,
"step": 133
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 371.0,
"completions/max_terminated_length": 371.0,
"completions/mean_length": 202.95703125,
"completions/mean_terminated_length": 202.95703125,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.2144,
"grad_norm": 0.03131137415766716,
"learning_rate": 8.736654804270462e-07,
"loss": 0.0037,
"num_tokens": 65725903.0,
"reward": 1.3632652759552002,
"reward_std": 0.15484619140625,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.7492175698280334,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7975935935974121,
"step": 134
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 501.0,
"completions/max_terminated_length": 501.0,
"completions/mean_length": 216.09375,
"completions/mean_terminated_length": 216.09375,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.216,
"grad_norm": 0.03075851872563362,
"learning_rate": 8.718861209964412e-07,
"loss": 0.0039,
"num_tokens": 66198535.0,
"reward": 1.4220490455627441,
"reward_std": 0.13649022579193115,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.7138031125068665,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7868932485580444,
"step": 135
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 438.0,
"completions/max_terminated_length": 438.0,
"completions/mean_length": 216.6953125,
"completions/mean_terminated_length": 216.6953125,
"completions/min_length": 107.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.2176,
"grad_norm": 0.03281315043568611,
"learning_rate": 8.701067615658363e-07,
"loss": 0.0138,
"num_tokens": 66696009.0,
"reward": 1.294492483139038,
"reward_std": 0.23467326164245605,
"rewards/accuracy_reward_long_step": 0.40234375,
"rewards/final_brier_reward_long_step": 0.7704480886459351,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.798146665096283,
"step": 136
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 420.0,
"completions/max_terminated_length": 420.0,
"completions/mean_length": 204.03515625,
"completions/mean_terminated_length": 204.03515625,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.2192,
"grad_norm": 0.03420788049697876,
"learning_rate": 8.683274021352312e-07,
"loss": -0.0052,
"num_tokens": 67178186.0,
"reward": 1.439988613128662,
"reward_std": 0.19740483164787292,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.7100058794021606,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8155736923217773,
"step": 137
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 572.0,
"completions/max_terminated_length": 572.0,
"completions/mean_length": 208.546875,
"completions/mean_terminated_length": 208.546875,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.2208,
"grad_norm": 0.030317138880491257,
"learning_rate": 8.665480427046264e-07,
"loss": -0.0,
"num_tokens": 67664638.0,
"reward": 1.3270107507705688,
"reward_std": 0.156023770570755,
"rewards/accuracy_reward_long_step": 0.4296875,
"rewards/final_brier_reward_long_step": 0.7715405821800232,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8255646228790283,
"step": 138
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 502.0,
"completions/max_terminated_length": 502.0,
"completions/mean_length": 216.421875,
"completions/mean_terminated_length": 216.421875,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.2224,
"grad_norm": 0.0344645120203495,
"learning_rate": 8.647686832740213e-07,
"loss": -0.0067,
"num_tokens": 68146034.0,
"reward": 1.2016233205795288,
"reward_std": 0.17633959650993347,
"rewards/accuracy_reward_long_step": 0.30078125,
"rewards/final_brier_reward_long_step": 0.7998050451278687,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.803563117980957,
"step": 139
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 393.0,
"completions/max_terminated_length": 393.0,
"completions/mean_length": 209.14453125,
"completions/mean_terminated_length": 209.14453125,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.224,
"grad_norm": 0.029939748346805573,
"learning_rate": 8.629893238434164e-07,
"loss": -0.0057,
"num_tokens": 68631727.0,
"reward": 1.4235725402832031,
"reward_std": 0.14265938103199005,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.720478892326355,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7706866264343262,
"step": 140
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 362.0,
"completions/max_terminated_length": 362.0,
"completions/mean_length": 209.578125,
"completions/mean_terminated_length": 209.578125,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.2256,
"grad_norm": 0.030273519456386566,
"learning_rate": 8.612099644128114e-07,
"loss": -0.0046,
"num_tokens": 69117867.0,
"reward": 1.3343067169189453,
"reward_std": 0.16028451919555664,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.7540902495384216,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7784492373466492,
"step": 141
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 426.0,
"completions/max_terminated_length": 426.0,
"completions/mean_length": 220.03515625,
"completions/mean_terminated_length": 220.03515625,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.2272,
"grad_norm": 0.03172041475772858,
"learning_rate": 8.594306049822063e-07,
"loss": 0.0148,
"num_tokens": 69616820.0,
"reward": 1.3687831163406372,
"reward_std": 0.12923522293567657,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.7290538549423218,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7929534316062927,
"step": 142
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 428.0,
"completions/max_terminated_length": 428.0,
"completions/mean_length": 218.36328125,
"completions/mean_terminated_length": 218.36328125,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.2288,
"grad_norm": 0.03149149566888809,
"learning_rate": 8.576512455516014e-07,
"loss": 0.0306,
"num_tokens": 70114945.0,
"reward": 1.3903582096099854,
"reward_std": 0.25830644369125366,
"rewards/accuracy_reward_long_step": 0.52734375,
"rewards/final_brier_reward_long_step": 0.6927156448364258,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7593424320220947,
"step": 143
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 450.0,
"completions/max_terminated_length": 450.0,
"completions/mean_length": 208.25390625,
"completions/mean_terminated_length": 208.25390625,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.2304,
"grad_norm": 0.03169076144695282,
"learning_rate": 8.558718861209963e-07,
"loss": 0.0027,
"num_tokens": 70590714.0,
"reward": 1.319934606552124,
"reward_std": 0.18163828551769257,
"rewards/accuracy_reward_long_step": 0.4296875,
"rewards/final_brier_reward_long_step": 0.7742601633071899,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7867279052734375,
"step": 144
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 407.0,
"completions/max_terminated_length": 407.0,
"completions/mean_length": 207.875,
"completions/mean_terminated_length": 207.875,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.232,
"grad_norm": 0.055429354310035706,
"learning_rate": 8.540925266903915e-07,
"loss": 0.0016,
"num_tokens": 71085394.0,
"reward": 1.3640680313110352,
"reward_std": 0.20486664772033691,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7340711355209351,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7847013473510742,
"step": 145
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 375.0,
"completions/max_terminated_length": 375.0,
"completions/mean_length": 218.2109375,
"completions/mean_terminated_length": 218.2109375,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.2336,
"grad_norm": 0.03302358463406563,
"learning_rate": 8.523131672597864e-07,
"loss": 0.0093,
"num_tokens": 71574600.0,
"reward": 1.4761652946472168,
"reward_std": 0.2185746729373932,
"rewards/accuracy_reward_long_step": 0.61328125,
"rewards/final_brier_reward_long_step": 0.6924906373023987,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7668579816818237,
"step": 146
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 477.0,
"completions/max_terminated_length": 477.0,
"completions/mean_length": 209.34765625,
"completions/mean_terminated_length": 209.34765625,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.2352,
"grad_norm": 0.03392917290329933,
"learning_rate": 8.505338078291815e-07,
"loss": -0.0052,
"num_tokens": 72047361.0,
"reward": 1.4971990585327148,
"reward_std": 0.17058077454566956,
"rewards/accuracy_reward_long_step": 0.6328125,
"rewards/final_brier_reward_long_step": 0.6820136904716492,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7755322456359863,
"step": 147
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 396.0,
"completions/max_terminated_length": 396.0,
"completions/mean_length": 208.20703125,
"completions/mean_terminated_length": 208.20703125,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.2368,
"grad_norm": 0.03449048101902008,
"learning_rate": 8.487544483985765e-07,
"loss": 0.0062,
"num_tokens": 72528174.0,
"reward": 1.5363779067993164,
"reward_std": 0.17959806323051453,
"rewards/accuracy_reward_long_step": 0.671875,
"rewards/final_brier_reward_long_step": 0.7153710722923279,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7426407337188721,
"step": 148
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 208.875,
"completions/mean_terminated_length": 208.875,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.2384,
"grad_norm": 0.030924499034881592,
"learning_rate": 8.469750889679715e-07,
"loss": 0.0016,
"num_tokens": 73014590.0,
"reward": 1.2735717296600342,
"reward_std": 0.16099971532821655,
"rewards/accuracy_reward_long_step": 0.38671875,
"rewards/final_brier_reward_long_step": 0.7689594030380249,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7784522771835327,
"step": 149
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 430.0,
"completions/max_terminated_length": 430.0,
"completions/mean_length": 222.2109375,
"completions/mean_terminated_length": 222.2109375,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.24,
"grad_norm": 0.0321771465241909,
"learning_rate": 8.451957295373665e-07,
"loss": -0.0022,
"num_tokens": 73481692.0,
"reward": 1.3676480054855347,
"reward_std": 0.2422865331172943,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7435883283615112,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8051284551620483,
"step": 150
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 500.0,
"completions/max_terminated_length": 500.0,
"completions/mean_length": 215.85546875,
"completions/mean_terminated_length": 215.85546875,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.2416,
"grad_norm": 0.03657348453998566,
"learning_rate": 8.434163701067614e-07,
"loss": 0.0096,
"num_tokens": 73961575.0,
"reward": 1.4443353414535522,
"reward_std": 0.21228715777397156,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.7566316723823547,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7863348126411438,
"step": 151
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 398.0,
"completions/max_terminated_length": 398.0,
"completions/mean_length": 213.53515625,
"completions/mean_terminated_length": 213.53515625,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.2432,
"grad_norm": 0.03434426710009575,
"learning_rate": 8.416370106761566e-07,
"loss": 0.0015,
"num_tokens": 74427848.0,
"reward": 1.223260521888733,
"reward_std": 0.18570977449417114,
"rewards/accuracy_reward_long_step": 0.33203125,
"rewards/final_brier_reward_long_step": 0.781054675579071,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7838626503944397,
"step": 152
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 469.0,
"completions/max_terminated_length": 469.0,
"completions/mean_length": 219.49609375,
"completions/mean_terminated_length": 219.49609375,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.2448,
"grad_norm": 0.029247252270579338,
"learning_rate": 8.398576512455516e-07,
"loss": 0.0116,
"num_tokens": 74910703.0,
"reward": 1.4385360479354858,
"reward_std": 0.24247096478939056,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.7055359482765198,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7829832434654236,
"step": 153
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 376.0,
"completions/max_terminated_length": 376.0,
"completions/mean_length": 220.2734375,
"completions/mean_terminated_length": 220.2734375,
"completions/min_length": 100.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.2464,
"grad_norm": 0.02955593541264534,
"learning_rate": 8.380782918149466e-07,
"loss": -0.0045,
"num_tokens": 75393821.0,
"reward": 1.454419732093811,
"reward_std": 0.15751710534095764,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.7552148699760437,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7812142968177795,
"step": 154
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 474.0,
"completions/max_terminated_length": 474.0,
"completions/mean_length": 218.16796875,
"completions/mean_terminated_length": 218.16796875,
"completions/min_length": 85.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.248,
"grad_norm": 0.03182348608970642,
"learning_rate": 8.362989323843416e-07,
"loss": 0.0059,
"num_tokens": 75881664.0,
"reward": 1.4293267726898193,
"reward_std": 0.20279854536056519,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.7694789171218872,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7915782928466797,
"step": 155
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 496.0,
"completions/max_terminated_length": 496.0,
"completions/mean_length": 238.328125,
"completions/mean_terminated_length": 238.328125,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.2496,
"grad_norm": 0.030359633266925812,
"learning_rate": 8.345195729537366e-07,
"loss": 0.0028,
"num_tokens": 76368372.0,
"reward": 1.3188002109527588,
"reward_std": 0.2175511121749878,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.7726074457168579,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7682181596755981,
"step": 156
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 508.0,
"completions/max_terminated_length": 508.0,
"completions/mean_length": 225.96484375,
"completions/mean_terminated_length": 225.96484375,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.2512,
"grad_norm": 0.03001994453370571,
"learning_rate": 8.327402135231316e-07,
"loss": 0.0162,
"num_tokens": 76846323.0,
"reward": 1.509331226348877,
"reward_std": 0.18848416209220886,
"rewards/accuracy_reward_long_step": 0.6171875,
"rewards/final_brier_reward_long_step": 0.7627733945846558,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.805801272392273,
"step": 157
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 431.0,
"completions/max_terminated_length": 431.0,
"completions/mean_length": 245.515625,
"completions/mean_terminated_length": 245.515625,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.2528,
"grad_norm": 0.02940976247191429,
"learning_rate": 8.309608540925266e-07,
"loss": 0.0073,
"num_tokens": 77351735.0,
"reward": 1.2970399856567383,
"reward_std": 0.19197387993335724,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7378246188163757,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8018975853919983,
"step": 158
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 602.0,
"completions/max_terminated_length": 602.0,
"completions/mean_length": 238.7890625,
"completions/mean_terminated_length": 238.7890625,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.2544,
"grad_norm": 0.030583331361413002,
"learning_rate": 8.291814946619217e-07,
"loss": 0.0032,
"num_tokens": 77840273.0,
"reward": 1.191973090171814,
"reward_std": 0.15967227518558502,
"rewards/accuracy_reward_long_step": 0.29296875,
"rewards/final_brier_reward_long_step": 0.7833398580551147,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8126777410507202,
"step": 159
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 447.0,
"completions/max_terminated_length": 447.0,
"completions/mean_length": 222.33984375,
"completions/mean_terminated_length": 222.33984375,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.256,
"grad_norm": 0.02875317633152008,
"learning_rate": 8.274021352313167e-07,
"loss": 0.001,
"num_tokens": 78315032.0,
"reward": 1.4728080034255981,
"reward_std": 0.1814574897289276,
"rewards/accuracy_reward_long_step": 0.578125,
"rewards/final_brier_reward_long_step": 0.7983136773109436,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7804182767868042,
"step": 160
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 396.0,
"completions/max_terminated_length": 396.0,
"completions/mean_length": 243.73828125,
"completions/mean_terminated_length": 243.73828125,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.2576,
"grad_norm": 0.028153013437986374,
"learning_rate": 8.256227758007117e-07,
"loss": -0.0008,
"num_tokens": 78800285.0,
"reward": 1.3710644245147705,
"reward_std": 0.21033860743045807,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7794238328933716,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7829592227935791,
"step": 161
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 539.0,
"completions/max_terminated_length": 539.0,
"completions/mean_length": 247.23828125,
"completions/mean_terminated_length": 247.23828125,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.2592,
"grad_norm": 0.02903878502547741,
"learning_rate": 8.238434163701067e-07,
"loss": 0.0002,
"num_tokens": 79278586.0,
"reward": 1.3124269247055054,
"reward_std": 0.22732782363891602,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.7707054615020752,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7758771181106567,
"step": 162
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 574.0,
"completions/max_terminated_length": 574.0,
"completions/mean_length": 249.76171875,
"completions/mean_terminated_length": 249.76171875,
"completions/min_length": 146.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.2608,
"grad_norm": 0.028138713911175728,
"learning_rate": 8.220640569395017e-07,
"loss": -0.0073,
"num_tokens": 79765109.0,
"reward": 1.4664216041564941,
"reward_std": 0.16438095271587372,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.7759047150611877,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.808531641960144,
"step": 163
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 516.0,
"completions/max_terminated_length": 516.0,
"completions/mean_length": 241.08984375,
"completions/mean_terminated_length": 241.08984375,
"completions/min_length": 147.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.2624,
"grad_norm": 0.028392404317855835,
"learning_rate": 8.202846975088967e-07,
"loss": 0.0032,
"num_tokens": 80258676.0,
"reward": 1.501215934753418,
"reward_std": 0.17474979162216187,
"rewards/accuracy_reward_long_step": 0.6171875,
"rewards/final_brier_reward_long_step": 0.7508887052536011,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7852252721786499,
"step": 164
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 460.0,
"completions/max_terminated_length": 460.0,
"completions/mean_length": 231.37109375,
"completions/mean_terminated_length": 231.37109375,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.264,
"grad_norm": 0.028890179470181465,
"learning_rate": 8.185053380782919e-07,
"loss": 0.0041,
"num_tokens": 80732043.0,
"reward": 1.4701359272003174,
"reward_std": 0.1824515014886856,
"rewards/accuracy_reward_long_step": 0.58203125,
"rewards/final_brier_reward_long_step": 0.7417089939117432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8107101321220398,
"step": 165
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 447.0,
"completions/max_terminated_length": 447.0,
"completions/mean_length": 238.31640625,
"completions/mean_terminated_length": 238.31640625,
"completions/min_length": 147.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.2656,
"grad_norm": 0.04968814551830292,
"learning_rate": 8.167259786476868e-07,
"loss": 0.0087,
"num_tokens": 81219316.0,
"reward": 1.3115195035934448,
"reward_std": 0.1865576058626175,
"rewards/accuracy_reward_long_step": 0.421875,
"rewards/final_brier_reward_long_step": 0.7827734351158142,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7758046984672546,
"step": 166
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 473.0,
"completions/max_terminated_length": 473.0,
"completions/mean_length": 246.90625,
"completions/mean_terminated_length": 246.90625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.2672,
"grad_norm": 0.02806948870420456,
"learning_rate": 8.149466192170819e-07,
"loss": -0.0126,
"num_tokens": 81722972.0,
"reward": 1.287018060684204,
"reward_std": 0.17807143926620483,
"rewards/accuracy_reward_long_step": 0.41015625,
"rewards/final_brier_reward_long_step": 0.7411332130432129,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.766313910484314,
"step": 167
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 430.0,
"completions/max_terminated_length": 430.0,
"completions/mean_length": 242.71875,
"completions/mean_terminated_length": 242.71875,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.2688,
"grad_norm": 0.02786392532289028,
"learning_rate": 8.131672597864768e-07,
"loss": -0.0143,
"num_tokens": 82208676.0,
"reward": 1.3826302289962769,
"reward_std": 0.16845399141311646,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.7937402129173279,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8305305242538452,
"step": 168
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 408.0,
"completions/max_terminated_length": 408.0,
"completions/mean_length": 248.65234375,
"completions/mean_terminated_length": 248.65234375,
"completions/min_length": 149.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.2704,
"grad_norm": 0.02810235507786274,
"learning_rate": 8.113879003558719e-07,
"loss": -0.0028,
"num_tokens": 82703459.0,
"reward": 1.2465064525604248,
"reward_std": 0.1827697902917862,
"rewards/accuracy_reward_long_step": 0.375,
"rewards/final_brier_reward_long_step": 0.701416015625,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7846096754074097,
"step": 169
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 616.0,
"completions/max_terminated_length": 616.0,
"completions/mean_length": 256.98046875,
"completions/mean_terminated_length": 256.98046875,
"completions/min_length": 154.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.272,
"grad_norm": 0.03040560707449913,
"learning_rate": 8.096085409252668e-07,
"loss": -0.0086,
"num_tokens": 83174350.0,
"reward": 1.3326289653778076,
"reward_std": 0.19962584972381592,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.7591210603713989,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7823324203491211,
"step": 170
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 595.0,
"completions/max_terminated_length": 595.0,
"completions/mean_length": 250.66015625,
"completions/mean_terminated_length": 250.66015625,
"completions/min_length": 149.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.2736,
"grad_norm": 0.02771839126944542,
"learning_rate": 8.078291814946618e-07,
"loss": -0.0059,
"num_tokens": 83664551.0,
"reward": 1.3717570304870605,
"reward_std": 0.20381051301956177,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7613476514816284,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8038052320480347,
"step": 171
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 438.0,
"completions/max_terminated_length": 438.0,
"completions/mean_length": 245.0703125,
"completions/mean_terminated_length": 245.0703125,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.2752,
"grad_norm": 0.03148366138339043,
"learning_rate": 8.06049822064057e-07,
"loss": 0.0026,
"num_tokens": 84163441.0,
"reward": 1.2595324516296387,
"reward_std": 0.21818827092647552,
"rewards/accuracy_reward_long_step": 0.37890625,
"rewards/final_brier_reward_long_step": 0.7574383020401001,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7650665044784546,
"step": 172
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 392.0,
"completions/max_terminated_length": 392.0,
"completions/mean_length": 241.73828125,
"completions/mean_terminated_length": 241.73828125,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.2768,
"grad_norm": 0.027540108188986778,
"learning_rate": 8.042704626334519e-07,
"loss": 0.0026,
"num_tokens": 84651926.0,
"reward": 1.3201969861984253,
"reward_std": 0.23394638299942017,
"rewards/accuracy_reward_long_step": 0.4296875,
"rewards/final_brier_reward_long_step": 0.7422363758087158,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8198016285896301,
"step": 173
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 639.0,
"completions/max_terminated_length": 639.0,
"completions/mean_length": 245.08984375,
"completions/mean_terminated_length": 245.08984375,
"completions/min_length": 59.0,
"completions/min_terminated_length": 59.0,
"epoch": 0.2784,
"grad_norm": 0.02940957434475422,
"learning_rate": 8.02491103202847e-07,
"loss": -0.0074,
"num_tokens": 85147357.0,
"reward": 1.2906839847564697,
"reward_std": 0.2153157889842987,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7498632669448853,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7644352912902832,
"step": 174
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 442.0,
"completions/max_terminated_length": 442.0,
"completions/mean_length": 245.8515625,
"completions/mean_terminated_length": 245.8515625,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.28,
"grad_norm": 0.03165048733353615,
"learning_rate": 8.007117437722419e-07,
"loss": 0.0137,
"num_tokens": 85624735.0,
"reward": 1.4603183269500732,
"reward_std": 0.22203630208969116,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7952343821525574,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.796039342880249,
"step": 175
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 407.0,
"completions/max_terminated_length": 407.0,
"completions/mean_length": 250.11328125,
"completions/mean_terminated_length": 251.09413146972656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.2816,
"grad_norm": 0.0285260621458292,
"learning_rate": 7.98932384341637e-07,
"loss": -0.0097,
"num_tokens": 86101692.0,
"reward": 1.4544804096221924,
"reward_std": 0.21490904688835144,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.7437987923622131,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.8084980249404907,
"step": 176
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 387.0,
"completions/max_terminated_length": 387.0,
"completions/mean_length": 247.91015625,
"completions/mean_terminated_length": 247.91015625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.2832,
"grad_norm": 0.02845529466867447,
"learning_rate": 7.97153024911032e-07,
"loss": 0.0112,
"num_tokens": 86593685.0,
"reward": 1.3518202304840088,
"reward_std": 0.15090304613113403,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.7558691501617432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7764118909835815,
"step": 177
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 719.0,
"completions/max_terminated_length": 719.0,
"completions/mean_length": 253.98046875,
"completions/mean_terminated_length": 253.98046875,
"completions/min_length": 146.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.2848,
"grad_norm": 0.029682578518986702,
"learning_rate": 7.95373665480427e-07,
"loss": -0.0156,
"num_tokens": 87076488.0,
"reward": 1.3396403789520264,
"reward_std": 0.1541176736354828,
"rewards/accuracy_reward_long_step": 0.4375,
"rewards/final_brier_reward_long_step": 0.8080171346664429,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8005446195602417,
"step": 178
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 248.99609375,
"completions/mean_terminated_length": 248.99609375,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.2864,
"grad_norm": 0.028529809787869453,
"learning_rate": 7.935943060498221e-07,
"loss": 0.0108,
"num_tokens": 87564831.0,
"reward": 1.3985368013381958,
"reward_std": 0.15740060806274414,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.7856543064117432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8241176605224609,
"step": 179
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 242.43359375,
"completions/mean_terminated_length": 242.43359375,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.288,
"grad_norm": 0.029515286907553673,
"learning_rate": 7.91814946619217e-07,
"loss": -0.0121,
"num_tokens": 88038206.0,
"reward": 1.4421117305755615,
"reward_std": 0.245658278465271,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.7302929759025574,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7725290060043335,
"step": 180
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 479.0,
"completions/max_terminated_length": 479.0,
"completions/mean_length": 247.80859375,
"completions/mean_terminated_length": 247.80859375,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.2896,
"grad_norm": 0.028533408418297768,
"learning_rate": 7.900355871886121e-07,
"loss": -0.0021,
"num_tokens": 88526117.0,
"reward": 1.4392614364624023,
"reward_std": 0.1886727213859558,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.7036230564117432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7721726894378662,
"step": 181
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 402.0,
"completions/max_terminated_length": 402.0,
"completions/mean_length": 247.95703125,
"completions/mean_terminated_length": 247.95703125,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.2912,
"grad_norm": 0.03011815994977951,
"learning_rate": 7.88256227758007e-07,
"loss": 0.0105,
"num_tokens": 89007906.0,
"reward": 1.3243916034698486,
"reward_std": 0.23409831523895264,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.7340039014816284,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7666873931884766,
"step": 182
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 520.0,
"completions/max_terminated_length": 520.0,
"completions/mean_length": 257.90234375,
"completions/mean_terminated_length": 257.90234375,
"completions/min_length": 138.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.2928,
"grad_norm": 0.02743351273238659,
"learning_rate": 7.864768683274021e-07,
"loss": 0.0068,
"num_tokens": 89487889.0,
"reward": 1.3753552436828613,
"reward_std": 0.13086272776126862,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7642577886581421,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7996633648872375,
"step": 183
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 714.0,
"completions/max_terminated_length": 714.0,
"completions/mean_length": 260.5,
"completions/mean_terminated_length": 260.5,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.2944,
"grad_norm": 0.02766694501042366,
"learning_rate": 7.846975088967971e-07,
"loss": 0.0127,
"num_tokens": 89978625.0,
"reward": 1.4390387535095215,
"reward_std": 0.17982302606105804,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.7528809309005737,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7688993811607361,
"step": 184
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 419.0,
"completions/max_terminated_length": 419.0,
"completions/mean_length": 271.25390625,
"completions/mean_terminated_length": 271.25390625,
"completions/min_length": 160.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.296,
"grad_norm": 0.0269797183573246,
"learning_rate": 7.829181494661921e-07,
"loss": -0.0066,
"num_tokens": 90473290.0,
"reward": 1.3175511360168457,
"reward_std": 0.13507431745529175,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.7091602087020874,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7485443353652954,
"step": 185
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 471.0,
"completions/max_terminated_length": 471.0,
"completions/mean_length": 268.40625,
"completions/mean_terminated_length": 268.40625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.2976,
"grad_norm": 0.027505241334438324,
"learning_rate": 7.811387900355872e-07,
"loss": -0.0114,
"num_tokens": 90958874.0,
"reward": 1.517210602760315,
"reward_std": 0.18232710659503937,
"rewards/accuracy_reward_long_step": 0.6171875,
"rewards/final_brier_reward_long_step": 0.8131054639816284,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7869866490364075,
"step": 186
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 481.0,
"completions/max_terminated_length": 481.0,
"completions/mean_length": 275.80859375,
"completions/mean_terminated_length": 275.80859375,
"completions/min_length": 149.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.2992,
"grad_norm": 0.0265911016613245,
"learning_rate": 7.793594306049822e-07,
"loss": -0.0118,
"num_tokens": 91450809.0,
"reward": 1.4153180122375488,
"reward_std": 0.20222672820091248,
"rewards/accuracy_reward_long_step": 0.52734375,
"rewards/final_brier_reward_long_step": 0.724365234375,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8275318145751953,
"step": 187
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 516.0,
"completions/max_terminated_length": 516.0,
"completions/mean_length": 282.4375,
"completions/mean_terminated_length": 282.4375,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.3008,
"grad_norm": 0.02835831232368946,
"learning_rate": 7.775800711743772e-07,
"loss": 0.0055,
"num_tokens": 91962913.0,
"reward": 1.3732486963272095,
"reward_std": 0.21462617814540863,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7563574314117432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.799137532711029,
"step": 188
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 282.69140625,
"completions/mean_terminated_length": 282.69140625,
"completions/min_length": 158.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.3024,
"grad_norm": 0.026161570101976395,
"learning_rate": 7.758007117437722e-07,
"loss": 0.0141,
"num_tokens": 92451986.0,
"reward": 1.4073671102523804,
"reward_std": 0.12184424698352814,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.825976550579071,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8034918308258057,
"step": 189
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 459.0,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 286.796875,
"completions/mean_terminated_length": 286.796875,
"completions/min_length": 165.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.304,
"grad_norm": 0.027012880891561508,
"learning_rate": 7.740213523131672e-07,
"loss": 0.0021,
"num_tokens": 92948694.0,
"reward": 1.2427858114242554,
"reward_std": 0.235196053981781,
"rewards/accuracy_reward_long_step": 0.37109375,
"rewards/final_brier_reward_long_step": 0.6898242235183716,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7969439625740051,
"step": 190
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 517.0,
"completions/max_terminated_length": 517.0,
"completions/mean_length": 282.46484375,
"completions/mean_terminated_length": 282.46484375,
"completions/min_length": 165.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.3056,
"grad_norm": 0.037487372756004333,
"learning_rate": 7.722419928825622e-07,
"loss": 0.0056,
"num_tokens": 93449965.0,
"reward": 1.3565409183502197,
"reward_std": 0.14568164944648743,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.733447253704071,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8177168369293213,
"step": 191
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 547.0,
"completions/max_terminated_length": 547.0,
"completions/mean_length": 294.59375,
"completions/mean_terminated_length": 294.59375,
"completions/min_length": 164.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.3072,
"grad_norm": 0.026451628655195236,
"learning_rate": 7.704626334519572e-07,
"loss": 0.0008,
"num_tokens": 93958261.0,
"reward": 1.1881611347198486,
"reward_std": 0.18037152290344238,
"rewards/accuracy_reward_long_step": 0.296875,
"rewards/final_brier_reward_long_step": 0.7727734446525574,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.792371392250061,
"step": 192
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 290.57421875,
"completions/mean_terminated_length": 290.57421875,
"completions/min_length": 150.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.3088,
"grad_norm": 0.026738133281469345,
"learning_rate": 7.686832740213523e-07,
"loss": 0.0111,
"num_tokens": 94465464.0,
"reward": 1.466090202331543,
"reward_std": 0.17279371619224548,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.7865039110183716,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8122318983078003,
"step": 193
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 511.0,
"completions/max_terminated_length": 511.0,
"completions/mean_length": 282.94921875,
"completions/mean_terminated_length": 282.94921875,
"completions/min_length": 157.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.3104,
"grad_norm": 0.02935073897242546,
"learning_rate": 7.669039145907473e-07,
"loss": 0.0057,
"num_tokens": 94968371.0,
"reward": 1.3700523376464844,
"reward_std": 0.21206629276275635,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7314281463623047,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8269064426422119,
"step": 194
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 532.0,
"completions/max_terminated_length": 532.0,
"completions/mean_length": 276.2578125,
"completions/mean_terminated_length": 276.2578125,
"completions/min_length": 161.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.312,
"grad_norm": 0.028218043968081474,
"learning_rate": 7.651245551601423e-07,
"loss": -0.0055,
"num_tokens": 95464117.0,
"reward": 1.5009515285491943,
"reward_std": 0.16311804950237274,
"rewards/accuracy_reward_long_step": 0.60546875,
"rewards/final_brier_reward_long_step": 0.7480566501617432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8338742256164551,
"step": 195
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 539.0,
"completions/max_terminated_length": 539.0,
"completions/mean_length": 277.2578125,
"completions/mean_terminated_length": 277.2578125,
"completions/min_length": 146.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.3136,
"grad_norm": 0.029587451368570328,
"learning_rate": 7.633451957295374e-07,
"loss": 0.004,
"num_tokens": 95963519.0,
"reward": 1.6074358224868774,
"reward_std": 0.1871250867843628,
"rewards/accuracy_reward_long_step": 0.6953125,
"rewards/final_brier_reward_long_step": 0.8149710893630981,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8335224390029907,
"step": 196
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 495.0,
"completions/max_terminated_length": 495.0,
"completions/mean_length": 273.578125,
"completions/mean_terminated_length": 273.578125,
"completions/min_length": 164.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.3152,
"grad_norm": 0.029551653191447258,
"learning_rate": 7.615658362989323e-07,
"loss": 0.008,
"num_tokens": 96461403.0,
"reward": 1.3781944513320923,
"reward_std": 0.17420879006385803,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.8260058164596558,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8117718696594238,
"step": 197
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 516.0,
"completions/max_terminated_length": 516.0,
"completions/mean_length": 274.83984375,
"completions/mean_terminated_length": 274.83984375,
"completions/min_length": 164.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.3168,
"grad_norm": 0.02734399028122425,
"learning_rate": 7.597864768683274e-07,
"loss": 0.0036,
"num_tokens": 96960098.0,
"reward": 1.3913518190383911,
"reward_std": 0.20948463678359985,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.7432616949081421,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8065208792686462,
"step": 198
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 586.0,
"completions/max_terminated_length": 586.0,
"completions/mean_length": 285.73046875,
"completions/mean_terminated_length": 285.73046875,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.3184,
"grad_norm": 0.025479217991232872,
"learning_rate": 7.580071174377223e-07,
"loss": -0.0064,
"num_tokens": 97462677.0,
"reward": 1.3470879793167114,
"reward_std": 0.17569580674171448,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.7766375541687012,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7523394823074341,
"step": 199
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 487.0,
"completions/max_terminated_length": 487.0,
"completions/mean_length": 276.9609375,
"completions/mean_terminated_length": 276.9609375,
"completions/min_length": 165.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.32,
"grad_norm": 0.026502054184675217,
"learning_rate": 7.562277580071174e-07,
"loss": 0.003,
"num_tokens": 97933083.0,
"reward": 1.3517411947250366,
"reward_std": 0.1127757877111435,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.7498577833175659,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7821072340011597,
"step": 200
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 440.0,
"completions/max_terminated_length": 440.0,
"completions/mean_length": 277.74609375,
"completions/mean_terminated_length": 277.74609375,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.3216,
"grad_norm": 0.026896316558122635,
"learning_rate": 7.544483985765125e-07,
"loss": 0.0045,
"num_tokens": 98407330.0,
"reward": 1.240262508392334,
"reward_std": 0.16174383461475372,
"rewards/accuracy_reward_long_step": 0.35546875,
"rewards/final_brier_reward_long_step": 0.798291027545929,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7408840656280518,
"step": 201
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 553.0,
"completions/max_terminated_length": 553.0,
"completions/mean_length": 261.98828125,
"completions/mean_terminated_length": 261.98828125,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.3232,
"grad_norm": 0.027860237285494804,
"learning_rate": 7.526690391459074e-07,
"loss": 0.0018,
"num_tokens": 98894503.0,
"reward": 1.223290205001831,
"reward_std": 0.15445607900619507,
"rewards/accuracy_reward_long_step": 0.3515625,
"rewards/final_brier_reward_long_step": 0.7443945407867432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7425163984298706,
"step": 202
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 481.0,
"completions/max_terminated_length": 481.0,
"completions/mean_length": 274.48828125,
"completions/mean_terminated_length": 274.48828125,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.3248,
"grad_norm": 0.02505328133702278,
"learning_rate": 7.508896797153025e-07,
"loss": -0.0032,
"num_tokens": 99377636.0,
"reward": 1.4042761325836182,
"reward_std": 0.2108316421508789,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.7737988233566284,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8120555877685547,
"step": 203
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 528.0,
"completions/max_terminated_length": 528.0,
"completions/mean_length": 271.3203125,
"completions/mean_terminated_length": 271.3203125,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.3264,
"grad_norm": 0.027994032949209213,
"learning_rate": 7.491103202846974e-07,
"loss": 0.0018,
"num_tokens": 99877070.0,
"reward": 1.4199368953704834,
"reward_std": 0.1661403775215149,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7513816356658936,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8033663034439087,
"step": 204
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 485.0,
"completions/max_terminated_length": 485.0,
"completions/mean_length": 265.5625,
"completions/mean_terminated_length": 266.60394287109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.328,
"grad_norm": 0.02644249238073826,
"learning_rate": 7.473309608540925e-07,
"loss": -0.0023,
"num_tokens": 100362006.0,
"reward": 1.4836362600326538,
"reward_std": 0.2127230167388916,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.7705457210540771,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.828061580657959,
"step": 205
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 429.0,
"completions/max_terminated_length": 429.0,
"completions/mean_length": 260.26953125,
"completions/mean_terminated_length": 260.26953125,
"completions/min_length": 129.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.3296,
"grad_norm": 0.02795090340077877,
"learning_rate": 7.455516014234874e-07,
"loss": 0.0014,
"num_tokens": 100836259.0,
"reward": 1.4013100862503052,
"reward_std": 0.17857375741004944,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7368500232696533,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7433903217315674,
"step": 206
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 463.0,
"completions/max_terminated_length": 463.0,
"completions/mean_length": 257.23828125,
"completions/mean_terminated_length": 257.23828125,
"completions/min_length": 138.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.3312,
"grad_norm": 0.028254900127649307,
"learning_rate": 7.437722419928826e-07,
"loss": 0.0146,
"num_tokens": 101316480.0,
"reward": 1.4756114482879639,
"reward_std": 0.21930184960365295,
"rewards/accuracy_reward_long_step": 0.60546875,
"rewards/final_brier_reward_long_step": 0.7034816741943359,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7770892381668091,
"step": 207
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 799.0,
"completions/max_terminated_length": 799.0,
"completions/mean_length": 275.65625,
"completions/mean_terminated_length": 275.65625,
"completions/min_length": 181.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.3328,
"grad_norm": 0.02601105347275734,
"learning_rate": 7.419928825622776e-07,
"loss": -0.0054,
"num_tokens": 101820640.0,
"reward": 1.4046986103057861,
"reward_std": 0.1485091894865036,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7158496379852295,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7779449224472046,
"step": 208
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 547.0,
"completions/max_terminated_length": 547.0,
"completions/mean_length": 275.76953125,
"completions/mean_terminated_length": 275.76953125,
"completions/min_length": 168.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.3344,
"grad_norm": 0.027420159429311752,
"learning_rate": 7.402135231316725e-07,
"loss": 0.0148,
"num_tokens": 102301669.0,
"reward": 1.3544528484344482,
"reward_std": 0.12338382005691528,
"rewards/accuracy_reward_long_step": 0.45703125,
"rewards/final_brier_reward_long_step": 0.750058650970459,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8396276235580444,
"step": 209
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 421.0,
"completions/max_terminated_length": 421.0,
"completions/mean_length": 260.109375,
"completions/mean_terminated_length": 260.109375,
"completions/min_length": 150.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.336,
"grad_norm": 0.027533669024705887,
"learning_rate": 7.384341637010676e-07,
"loss": -0.0076,
"num_tokens": 102791329.0,
"reward": 1.4349637031555176,
"reward_std": 0.23317797482013702,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7997035384178162,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8151513338088989,
"step": 210
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 522.0,
"completions/max_terminated_length": 522.0,
"completions/mean_length": 262.828125,
"completions/mean_terminated_length": 262.828125,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.3376,
"grad_norm": 0.028309568762779236,
"learning_rate": 7.366548042704625e-07,
"loss": 0.0238,
"num_tokens": 103289013.0,
"reward": 1.3645201921463013,
"reward_std": 0.20173460245132446,
"rewards/accuracy_reward_long_step": 0.4609375,
"rewards/final_brier_reward_long_step": 0.8263086080551147,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.788021981716156,
"step": 211
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 422.0,
"completions/max_terminated_length": 422.0,
"completions/mean_length": 257.796875,
"completions/mean_terminated_length": 257.796875,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.3392,
"grad_norm": 0.02732851170003414,
"learning_rate": 7.348754448398576e-07,
"loss": -0.005,
"num_tokens": 103776305.0,
"reward": 1.3025028705596924,
"reward_std": 0.19672125577926636,
"rewards/accuracy_reward_long_step": 0.41796875,
"rewards/final_brier_reward_long_step": 0.7558465003967285,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7822898626327515,
"step": 212
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 427.0,
"completions/max_terminated_length": 427.0,
"completions/mean_length": 252.21484375,
"completions/mean_terminated_length": 252.21484375,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.3408,
"grad_norm": 0.02799782156944275,
"learning_rate": 7.330960854092527e-07,
"loss": -0.0066,
"num_tokens": 104269640.0,
"reward": 1.3634313344955444,
"reward_std": 0.23930571973323822,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.72954922914505,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.810113787651062,
"step": 213
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 478.0,
"completions/max_terminated_length": 478.0,
"completions/mean_length": 251.56640625,
"completions/mean_terminated_length": 251.56640625,
"completions/min_length": 156.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.3424,
"grad_norm": 0.029079895466566086,
"learning_rate": 7.313167259786477e-07,
"loss": -0.0028,
"num_tokens": 104767105.0,
"reward": 1.3156641721725464,
"reward_std": 0.15963619947433472,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7907624840736389,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8156442642211914,
"step": 214
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 440.0,
"completions/max_terminated_length": 440.0,
"completions/mean_length": 248.34375,
"completions/mean_terminated_length": 248.34375,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.344,
"grad_norm": 0.029702944681048393,
"learning_rate": 7.295373665480427e-07,
"loss": 0.0191,
"num_tokens": 105264905.0,
"reward": 1.2511861324310303,
"reward_std": 0.13276691734790802,
"rewards/accuracy_reward_long_step": 0.375,
"rewards/final_brier_reward_long_step": 0.7566503286361694,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7559068202972412,
"step": 215
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 382.0,
"completions/max_terminated_length": 382.0,
"completions/mean_length": 240.96875,
"completions/mean_terminated_length": 240.96875,
"completions/min_length": 129.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.3456,
"grad_norm": 0.02834567055106163,
"learning_rate": 7.277580071174377e-07,
"loss": -0.0024,
"num_tokens": 105756457.0,
"reward": 1.4394803047180176,
"reward_std": 0.2180296927690506,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7011808753013611,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8067399859428406,
"step": 216
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 489.0,
"completions/max_terminated_length": 489.0,
"completions/mean_length": 247.52734375,
"completions/mean_terminated_length": 247.52734375,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.3472,
"grad_norm": 0.027301594614982605,
"learning_rate": 7.259786476868327e-07,
"loss": 0.0056,
"num_tokens": 106254728.0,
"reward": 1.3793138265609741,
"reward_std": 0.16131410002708435,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7766991853713989,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.818681001663208,
"step": 217
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 444.0,
"completions/max_terminated_length": 444.0,
"completions/mean_length": 246.4140625,
"completions/mean_terminated_length": 246.4140625,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.3488,
"grad_norm": 0.02674778178334236,
"learning_rate": 7.241992882562277e-07,
"loss": -0.0079,
"num_tokens": 106746986.0,
"reward": 1.3636713027954102,
"reward_std": 0.15767797827720642,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.743729293346405,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7734558582305908,
"step": 218
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 461.0,
"completions/max_terminated_length": 461.0,
"completions/mean_length": 250.6796875,
"completions/mean_terminated_length": 250.6796875,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.3504,
"grad_norm": 0.028994156047701836,
"learning_rate": 7.224199288256227e-07,
"loss": -0.015,
"num_tokens": 107246088.0,
"reward": 1.3627524375915527,
"reward_std": 0.21599024534225464,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.7811144590377808,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8027076721191406,
"step": 219
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 400.0,
"completions/max_terminated_length": 400.0,
"completions/mean_length": 238.0390625,
"completions/mean_terminated_length": 238.0390625,
"completions/min_length": 156.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.352,
"grad_norm": 0.03368399292230606,
"learning_rate": 7.206405693950178e-07,
"loss": -0.0043,
"num_tokens": 107737026.0,
"reward": 1.3865300416946411,
"reward_std": 0.20244070887565613,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7735304236412048,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8350895047187805,
"step": 220
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 420.0,
"completions/max_terminated_length": 420.0,
"completions/mean_length": 234.5234375,
"completions/mean_terminated_length": 234.5234375,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.3536,
"grad_norm": 0.030342837795615196,
"learning_rate": 7.188612099644128e-07,
"loss": 0.0076,
"num_tokens": 108207696.0,
"reward": 1.3147838115692139,
"reward_std": 0.14057135581970215,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.7470492124557495,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7386487722396851,
"step": 221
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 432.0,
"completions/max_terminated_length": 432.0,
"completions/mean_length": 243.98046875,
"completions/mean_terminated_length": 243.98046875,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.3552,
"grad_norm": 0.028509140014648438,
"learning_rate": 7.170818505338078e-07,
"loss": 0.0073,
"num_tokens": 108685563.0,
"reward": 1.4801634550094604,
"reward_std": 0.2189689725637436,
"rewards/accuracy_reward_long_step": 0.57421875,
"rewards/final_brier_reward_long_step": 0.7850944995880127,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8386842608451843,
"step": 222
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 393.0,
"completions/max_terminated_length": 393.0,
"completions/mean_length": 234.9921875,
"completions/mean_terminated_length": 234.9921875,
"completions/min_length": 115.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.3568,
"grad_norm": 0.02966553531587124,
"learning_rate": 7.153024911032028e-07,
"loss": 0.0007,
"num_tokens": 109167289.0,
"reward": 1.377956509590149,
"reward_std": 0.15985409915447235,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7817855477333069,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8081655502319336,
"step": 223
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 383.0,
"completions/max_terminated_length": 383.0,
"completions/mean_length": 231.79296875,
"completions/mean_terminated_length": 231.79296875,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.3584,
"grad_norm": 0.030611420050263405,
"learning_rate": 7.135231316725978e-07,
"loss": 0.0122,
"num_tokens": 109655324.0,
"reward": 1.3960628509521484,
"reward_std": 0.11526073515415192,
"rewards/accuracy_reward_long_step": 0.4609375,
"rewards/final_brier_reward_long_step": 0.865734338760376,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8747667074203491,
"step": 224
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 424.0,
"completions/max_terminated_length": 424.0,
"completions/mean_length": 239.73828125,
"completions/mean_terminated_length": 239.73828125,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.36,
"grad_norm": 0.032648514956235886,
"learning_rate": 7.117437722419929e-07,
"loss": 0.0134,
"num_tokens": 110142809.0,
"reward": 1.300749659538269,
"reward_std": 0.20763415098190308,
"rewards/accuracy_reward_long_step": 0.41796875,
"rewards/final_brier_reward_long_step": 0.7577574253082275,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7733659744262695,
"step": 225
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 480.0,
"completions/max_terminated_length": 480.0,
"completions/mean_length": 251.1875,
"completions/mean_terminated_length": 251.1875,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.3616,
"grad_norm": 0.027327539399266243,
"learning_rate": 7.099644128113878e-07,
"loss": 0.0023,
"num_tokens": 110646393.0,
"reward": 1.5032904148101807,
"reward_std": 0.1392737776041031,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.8319687843322754,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8374428749084473,
"step": 226
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 406.0,
"completions/max_terminated_length": 406.0,
"completions/mean_length": 248.3046875,
"completions/mean_terminated_length": 248.3046875,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.3632,
"grad_norm": 0.028683941811323166,
"learning_rate": 7.08185053380783e-07,
"loss": -0.0104,
"num_tokens": 111139431.0,
"reward": 1.2084152698516846,
"reward_std": 0.1405543088912964,
"rewards/accuracy_reward_long_step": 0.30859375,
"rewards/final_brier_reward_long_step": 0.7831394672393799,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8161464929580688,
"step": 227
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 369.0,
"completions/max_terminated_length": 369.0,
"completions/mean_length": 248.3671875,
"completions/mean_terminated_length": 248.3671875,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.3648,
"grad_norm": 0.02712222747504711,
"learning_rate": 7.064056939501779e-07,
"loss": -0.006,
"num_tokens": 111635245.0,
"reward": 1.341683030128479,
"reward_std": 0.18412762880325317,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.7527234554290771,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8483837842941284,
"step": 228
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 457.0,
"completions/max_terminated_length": 457.0,
"completions/mean_length": 242.3046875,
"completions/mean_terminated_length": 242.3046875,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.3664,
"grad_norm": 0.028868133202195168,
"learning_rate": 7.046263345195729e-07,
"loss": 0.0127,
"num_tokens": 112127091.0,
"reward": 1.4517192840576172,
"reward_std": 0.16360458731651306,
"rewards/accuracy_reward_long_step": 0.578125,
"rewards/final_brier_reward_long_step": 0.7802172303199768,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7219727039337158,
"step": 229
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 444.0,
"completions/max_terminated_length": 444.0,
"completions/mean_length": 249.1953125,
"completions/mean_terminated_length": 249.1953125,
"completions/min_length": 150.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.368,
"grad_norm": 0.030503099784255028,
"learning_rate": 7.028469750889679e-07,
"loss": 0.0012,
"num_tokens": 112615557.0,
"reward": 1.2901397943496704,
"reward_std": 0.1938847005367279,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.6676468849182129,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7585374116897583,
"step": 230
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 422.0,
"completions/max_terminated_length": 422.0,
"completions/mean_length": 236.74609375,
"completions/mean_terminated_length": 236.74609375,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.3696,
"grad_norm": 0.031112175434827805,
"learning_rate": 7.010676156583629e-07,
"loss": 0.0072,
"num_tokens": 113107308.0,
"reward": 1.3856728076934814,
"reward_std": 0.1376914530992508,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.6620769500732422,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.693114161491394,
"step": 231
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 427.0,
"completions/max_terminated_length": 427.0,
"completions/mean_length": 235.6640625,
"completions/mean_terminated_length": 235.6640625,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.3712,
"grad_norm": 0.028059890493750572,
"learning_rate": 6.99288256227758e-07,
"loss": -0.0005,
"num_tokens": 113587182.0,
"reward": 1.4213546514511108,
"reward_std": 0.2364693284034729,
"rewards/accuracy_reward_long_step": 0.5546875,
"rewards/final_brier_reward_long_step": 0.7457855343818665,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7208831310272217,
"step": 232
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 428.0,
"completions/max_terminated_length": 428.0,
"completions/mean_length": 235.29296875,
"completions/mean_terminated_length": 235.29296875,
"completions/min_length": 68.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.3728,
"grad_norm": 0.027861230075359344,
"learning_rate": 6.975088967971529e-07,
"loss": 0.0032,
"num_tokens": 114059153.0,
"reward": 1.3009544610977173,
"reward_std": 0.12013030052185059,
"rewards/accuracy_reward_long_step": 0.38671875,
"rewards/final_brier_reward_long_step": 0.8454800844192505,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8192753195762634,
"step": 233
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 429.0,
"completions/max_terminated_length": 429.0,
"completions/mean_length": 238.9921875,
"completions/mean_terminated_length": 238.9921875,
"completions/min_length": 152.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.3744,
"grad_norm": 0.02792465314269066,
"learning_rate": 6.957295373665481e-07,
"loss": -0.008,
"num_tokens": 114529591.0,
"reward": 1.4602546691894531,
"reward_std": 0.13010551035404205,
"rewards/accuracy_reward_long_step": 0.57421875,
"rewards/final_brier_reward_long_step": 0.7742418050765991,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7699018716812134,
"step": 234
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 378.0,
"completions/max_terminated_length": 378.0,
"completions/mean_length": 234.44140625,
"completions/mean_terminated_length": 234.44140625,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.376,
"grad_norm": 0.029247693717479706,
"learning_rate": 6.93950177935943e-07,
"loss": 0.0089,
"num_tokens": 115003176.0,
"reward": 1.4750906229019165,
"reward_std": 0.14190274477005005,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.8098050951957703,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8405575752258301,
"step": 235
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 391.0,
"completions/max_terminated_length": 391.0,
"completions/mean_length": 234.3203125,
"completions/mean_terminated_length": 234.3203125,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.3776,
"grad_norm": 0.04604804888367653,
"learning_rate": 6.921708185053381e-07,
"loss": -0.0091,
"num_tokens": 115499370.0,
"reward": 1.253650426864624,
"reward_std": 0.1829456090927124,
"rewards/accuracy_reward_long_step": 0.36328125,
"rewards/final_brier_reward_long_step": 0.8057793378829956,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7556971311569214,
"step": 236
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 449.0,
"completions/max_terminated_length": 449.0,
"completions/mean_length": 244.71875,
"completions/mean_terminated_length": 244.71875,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.3792,
"grad_norm": 0.031333666294813156,
"learning_rate": 6.903914590747331e-07,
"loss": 0.0144,
"num_tokens": 115977570.0,
"reward": 1.4031198024749756,
"reward_std": 0.1869141310453415,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.771310567855835,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8255437612533569,
"step": 237
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 420.0,
"completions/max_terminated_length": 420.0,
"completions/mean_length": 249.234375,
"completions/mean_terminated_length": 249.234375,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.3808,
"grad_norm": 0.028639167547225952,
"learning_rate": 6.88612099644128e-07,
"loss": 0.0002,
"num_tokens": 116465086.0,
"reward": 1.3048759698867798,
"reward_std": 0.18162578344345093,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.7125644683837891,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7725646495819092,
"step": 238
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 452.0,
"completions/max_terminated_length": 452.0,
"completions/mean_length": 246.1953125,
"completions/mean_terminated_length": 246.1953125,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.3824,
"grad_norm": 0.029338406398892403,
"learning_rate": 6.868327402135231e-07,
"loss": 0.0053,
"num_tokens": 116931320.0,
"reward": 1.4342849254608154,
"reward_std": 0.1952168196439743,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.8456206917762756,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.79776930809021,
"step": 239
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 494.0,
"completions/max_terminated_length": 494.0,
"completions/mean_length": 244.49609375,
"completions/mean_terminated_length": 244.49609375,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.384,
"grad_norm": 0.02827758900821209,
"learning_rate": 6.85053380782918e-07,
"loss": -0.0059,
"num_tokens": 117435631.0,
"reward": 1.448837399482727,
"reward_std": 0.13833093643188477,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.8106515407562256,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7815728187561035,
"step": 240
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 408.0,
"completions/max_terminated_length": 408.0,
"completions/mean_length": 245.53515625,
"completions/mean_terminated_length": 245.53515625,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.3856,
"grad_norm": 0.04907617345452309,
"learning_rate": 6.832740213523132e-07,
"loss": 0.0111,
"num_tokens": 117927904.0,
"reward": 1.2637048959732056,
"reward_std": 0.19578373432159424,
"rewards/accuracy_reward_long_step": 0.375,
"rewards/final_brier_reward_long_step": 0.7642871141433716,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7905321717262268,
"step": 241
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 395.0,
"completions/max_terminated_length": 395.0,
"completions/mean_length": 247.26953125,
"completions/mean_terminated_length": 247.26953125,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.3872,
"grad_norm": 0.02862401306629181,
"learning_rate": 6.814946619217081e-07,
"loss": 0.0034,
"num_tokens": 118409677.0,
"reward": 1.4095513820648193,
"reward_std": 0.16092461347579956,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.7932863235473633,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8136688470840454,
"step": 242
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 401.0,
"completions/max_terminated_length": 401.0,
"completions/mean_length": 245.3828125,
"completions/mean_terminated_length": 245.3828125,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.3888,
"grad_norm": 0.02867737039923668,
"learning_rate": 6.797153024911032e-07,
"loss": -0.0028,
"num_tokens": 118886583.0,
"reward": 1.4647985696792603,
"reward_std": 0.22731342911720276,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.7210452556610107,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7787743210792542,
"step": 243
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 423.0,
"completions/max_terminated_length": 423.0,
"completions/mean_length": 241.7578125,
"completions/mean_terminated_length": 241.7578125,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.3904,
"grad_norm": 0.029156368225812912,
"learning_rate": 6.779359430604982e-07,
"loss": -0.005,
"num_tokens": 119362457.0,
"reward": 1.4701181650161743,
"reward_std": 0.18407407402992249,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.7508969306945801,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7702009081840515,
"step": 244
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 452.0,
"completions/max_terminated_length": 452.0,
"completions/mean_length": 255.76171875,
"completions/mean_terminated_length": 255.76171875,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.392,
"grad_norm": 0.02922751009464264,
"learning_rate": 6.761565836298932e-07,
"loss": -0.0079,
"num_tokens": 119859300.0,
"reward": 1.2958691120147705,
"reward_std": 0.19874346256256104,
"rewards/accuracy_reward_long_step": 0.390625,
"rewards/final_brier_reward_long_step": 0.7741097807884216,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.84686678647995,
"step": 245
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 452.0,
"completions/max_terminated_length": 452.0,
"completions/mean_length": 253.25390625,
"completions/mean_terminated_length": 253.25390625,
"completions/min_length": 154.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.3936,
"grad_norm": 0.029055269435048103,
"learning_rate": 6.743772241992882e-07,
"loss": 0.0013,
"num_tokens": 120346845.0,
"reward": 1.3145337104797363,
"reward_std": 0.1577182412147522,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.7263835668563843,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.828626275062561,
"step": 246
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 455.0,
"completions/max_terminated_length": 455.0,
"completions/mean_length": 247.15625,
"completions/mean_terminated_length": 247.15625,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.3952,
"grad_norm": 0.030021749436855316,
"learning_rate": 6.725978647686833e-07,
"loss": -0.0072,
"num_tokens": 120849013.0,
"reward": 1.3015249967575073,
"reward_std": 0.15772968530654907,
"rewards/accuracy_reward_long_step": 0.41015625,
"rewards/final_brier_reward_long_step": 0.793542206287384,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7797452211380005,
"step": 247
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 461.0,
"completions/max_terminated_length": 461.0,
"completions/mean_length": 246.453125,
"completions/mean_terminated_length": 246.453125,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.3968,
"grad_norm": 0.03100651502609253,
"learning_rate": 6.708185053380783e-07,
"loss": -0.0017,
"num_tokens": 121339545.0,
"reward": 1.3638386726379395,
"reward_std": 0.18921023607254028,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.7788769602775574,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8171026110649109,
"step": 248
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 459.0,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 252.19140625,
"completions/mean_terminated_length": 252.19140625,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.3984,
"grad_norm": 0.029122378677129745,
"learning_rate": 6.690391459074733e-07,
"loss": 0.0026,
"num_tokens": 121826858.0,
"reward": 1.3321928977966309,
"reward_std": 0.18050938844680786,
"rewards/accuracy_reward_long_step": 0.4296875,
"rewards/final_brier_reward_long_step": 0.7863625288009644,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.82365882396698,
"step": 249
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 401.0,
"completions/max_terminated_length": 401.0,
"completions/mean_length": 255.10546875,
"completions/mean_terminated_length": 255.10546875,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.4,
"grad_norm": 0.02999301441013813,
"learning_rate": 6.672597864768683e-07,
"loss": -0.0024,
"num_tokens": 122323669.0,
"reward": 1.5447840690612793,
"reward_std": 0.1723225712776184,
"rewards/accuracy_reward_long_step": 0.66015625,
"rewards/final_brier_reward_long_step": 0.7203683853149414,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8181428909301758,
"step": 250
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 440.0,
"completions/max_terminated_length": 440.0,
"completions/mean_length": 263.68359375,
"completions/mean_terminated_length": 263.68359375,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.4016,
"grad_norm": 0.03101922571659088,
"learning_rate": 6.654804270462633e-07,
"loss": 0.0001,
"num_tokens": 122815420.0,
"reward": 1.2982118129730225,
"reward_std": 0.22702577710151672,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7307562828063965,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8058412075042725,
"step": 251
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 432.0,
"completions/max_terminated_length": 432.0,
"completions/mean_length": 269.63671875,
"completions/mean_terminated_length": 269.63671875,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.4032,
"grad_norm": 0.028656797483563423,
"learning_rate": 6.637010676156583e-07,
"loss": -0.0024,
"num_tokens": 123311063.0,
"reward": 1.2810194492340088,
"reward_std": 0.16011501848697662,
"rewards/accuracy_reward_long_step": 0.37109375,
"rewards/final_brier_reward_long_step": 0.7987828254699707,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8409198522567749,
"step": 252
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 425.0,
"completions/max_terminated_length": 425.0,
"completions/mean_length": 253.25,
"completions/mean_terminated_length": 253.25,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.4048,
"grad_norm": 0.030914753675460815,
"learning_rate": 6.619217081850533e-07,
"loss": 0.0152,
"num_tokens": 123774055.0,
"reward": 1.3488011360168457,
"reward_std": 0.12081344425678253,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.810867190361023,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8187124729156494,
"step": 253
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 429.0,
"completions/max_terminated_length": 429.0,
"completions/mean_length": 264.98046875,
"completions/mean_terminated_length": 264.98046875,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.4064,
"grad_norm": 0.028002172708511353,
"learning_rate": 6.601423487544484e-07,
"loss": -0.0013,
"num_tokens": 124260066.0,
"reward": 1.4030770063400269,
"reward_std": 0.14950624108314514,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7564605474472046,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7308475971221924,
"step": 254
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 550.0,
"completions/max_terminated_length": 550.0,
"completions/mean_length": 267.8203125,
"completions/mean_terminated_length": 267.8203125,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.408,
"grad_norm": 0.027425022795796394,
"learning_rate": 6.583629893238434e-07,
"loss": -0.0113,
"num_tokens": 124759276.0,
"reward": 1.3036949634552002,
"reward_std": 0.22329337894916534,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7808917760848999,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7776377201080322,
"step": 255
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 430.0,
"completions/max_terminated_length": 430.0,
"completions/mean_length": 256.765625,
"completions/mean_terminated_length": 256.765625,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.4096,
"grad_norm": 0.02852526493370533,
"learning_rate": 6.565836298932385e-07,
"loss": -0.0086,
"num_tokens": 125243288.0,
"reward": 1.462049961090088,
"reward_std": 0.17608040571212769,
"rewards/accuracy_reward_long_step": 0.6328125,
"rewards/final_brier_reward_long_step": 0.6412858963012695,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6756638288497925,
"step": 256
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 489.0,
"completions/max_terminated_length": 489.0,
"completions/mean_length": 267.80078125,
"completions/mean_terminated_length": 267.80078125,
"completions/min_length": 107.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.4112,
"grad_norm": 0.030284898355603218,
"learning_rate": 6.548042704626334e-07,
"loss": -0.0072,
"num_tokens": 125741869.0,
"reward": 1.3154691457748413,
"reward_std": 0.2600463032722473,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.7620574235916138,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7341942191123962,
"step": 257
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 418.0,
"completions/max_terminated_length": 418.0,
"completions/mean_length": 262.48828125,
"completions/mean_terminated_length": 262.48828125,
"completions/min_length": 107.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.4128,
"grad_norm": 0.03752259910106659,
"learning_rate": 6.530249110320284e-07,
"loss": -0.0087,
"num_tokens": 126234514.0,
"reward": 1.4143836498260498,
"reward_std": 0.14952951669692993,
"rewards/accuracy_reward_long_step": 0.51171875,
"rewards/final_brier_reward_long_step": 0.7889230251312256,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8217366933822632,
"step": 258
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 437.0,
"completions/max_terminated_length": 437.0,
"completions/mean_length": 274.88671875,
"completions/mean_terminated_length": 274.88671875,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.4144,
"grad_norm": 0.028582880273461342,
"learning_rate": 6.512455516014234e-07,
"loss": -0.0036,
"num_tokens": 126730069.0,
"reward": 1.4206815958023071,
"reward_std": 0.19087818264961243,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.7297155857086182,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8045732975006104,
"step": 259
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 478.0,
"completions/max_terminated_length": 478.0,
"completions/mean_length": 272.7578125,
"completions/mean_terminated_length": 272.7578125,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.416,
"grad_norm": 0.026567399501800537,
"learning_rate": 6.494661921708184e-07,
"loss": -0.0099,
"num_tokens": 127214927.0,
"reward": 1.3398463726043701,
"reward_std": 0.12067516893148422,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.860762894153595,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7017475366592407,
"step": 260
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 553.0,
"completions/max_terminated_length": 553.0,
"completions/mean_length": 277.7265625,
"completions/mean_terminated_length": 277.7265625,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.4176,
"grad_norm": 0.02695128507912159,
"learning_rate": 6.476868327402136e-07,
"loss": -0.001,
"num_tokens": 127710617.0,
"reward": 1.4976187944412231,
"reward_std": 0.14346104860305786,
"rewards/accuracy_reward_long_step": 0.60546875,
"rewards/final_brier_reward_long_step": 0.7943031191825867,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7742971181869507,
"step": 261
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 461.0,
"completions/max_terminated_length": 461.0,
"completions/mean_length": 273.38671875,
"completions/mean_terminated_length": 273.38671875,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.4192,
"grad_norm": 0.02583600953221321,
"learning_rate": 6.459074733096085e-07,
"loss": -0.0104,
"num_tokens": 128212556.0,
"reward": 1.436043620109558,
"reward_std": 0.11342111974954605,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.8090195655822754,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.778904914855957,
"step": 262
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 486.0,
"completions/max_terminated_length": 486.0,
"completions/mean_length": 275.109375,
"completions/mean_terminated_length": 275.109375,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.4208,
"grad_norm": 0.027538571506738663,
"learning_rate": 6.441281138790036e-07,
"loss": -0.0113,
"num_tokens": 128707720.0,
"reward": 1.2951858043670654,
"reward_std": 0.15389752388000488,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7856941223144531,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.738798975944519,
"step": 263
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 587.0,
"completions/max_terminated_length": 587.0,
"completions/mean_length": 292.70703125,
"completions/mean_terminated_length": 292.70703125,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.4224,
"grad_norm": 0.0262598879635334,
"learning_rate": 6.423487544483985e-07,
"loss": 0.017,
"num_tokens": 129207389.0,
"reward": 1.3416748046875,
"reward_std": 0.23134978115558624,
"rewards/accuracy_reward_long_step": 0.4375,
"rewards/final_brier_reward_long_step": 0.7901312112808228,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8343803882598877,
"step": 264
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 561.0,
"completions/max_terminated_length": 561.0,
"completions/mean_length": 288.3515625,
"completions/mean_terminated_length": 288.3515625,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.424,
"grad_norm": 0.030151214450597763,
"learning_rate": 6.405693950177936e-07,
"loss": 0.0096,
"num_tokens": 129708127.0,
"reward": 1.5116443634033203,
"reward_std": 0.15138523280620575,
"rewards/accuracy_reward_long_step": 0.61328125,
"rewards/final_brier_reward_long_step": 0.8109831809997559,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7824693918228149,
"step": 265
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 530.0,
"completions/max_terminated_length": 530.0,
"completions/mean_length": 285.359375,
"completions/mean_terminated_length": 285.359375,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.4256,
"grad_norm": 0.030536562204360962,
"learning_rate": 6.387900355871885e-07,
"loss": 0.0139,
"num_tokens": 130207523.0,
"reward": 1.5717109441757202,
"reward_std": 0.1531601846218109,
"rewards/accuracy_reward_long_step": 0.671875,
"rewards/final_brier_reward_long_step": 0.7941410541534424,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.805202841758728,
"step": 266
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 512.0,
"completions/max_terminated_length": 512.0,
"completions/mean_length": 285.46875,
"completions/mean_terminated_length": 285.46875,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.4272,
"grad_norm": 0.028177211061120033,
"learning_rate": 6.370106761565835e-07,
"loss": 0.0141,
"num_tokens": 130672219.0,
"reward": 1.3481889963150024,
"reward_std": 0.1336720734834671,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.836502730846405,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7750029563903809,
"step": 267
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 454.0,
"completions/max_terminated_length": 454.0,
"completions/mean_length": 289.078125,
"completions/mean_terminated_length": 289.078125,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.4288,
"grad_norm": 0.027222031727433205,
"learning_rate": 6.352313167259787e-07,
"loss": -0.0063,
"num_tokens": 131182759.0,
"reward": 1.3185360431671143,
"reward_std": 0.17626741528511047,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.7190519571304321,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7894670963287354,
"step": 268
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 533.0,
"completions/max_terminated_length": 533.0,
"completions/mean_length": 280.86328125,
"completions/mean_terminated_length": 280.86328125,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.4304,
"grad_norm": 0.03042900562286377,
"learning_rate": 6.334519572953736e-07,
"loss": -0.0005,
"num_tokens": 131680284.0,
"reward": 1.4278100728988647,
"reward_std": 0.10464347898960114,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.7685461044311523,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7708194255828857,
"step": 269
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 501.0,
"completions/max_terminated_length": 501.0,
"completions/mean_length": 287.1796875,
"completions/mean_terminated_length": 288.305908203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.432,
"grad_norm": 0.027735978364944458,
"learning_rate": 6.316725978647687e-07,
"loss": 0.0066,
"num_tokens": 132185850.0,
"reward": 1.4755172729492188,
"reward_std": 0.208018958568573,
"rewards/accuracy_reward_long_step": 0.59375,
"rewards/final_brier_reward_long_step": 0.7717519402503967,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7631298303604126,
"step": 270
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 598.0,
"completions/max_terminated_length": 598.0,
"completions/mean_length": 296.0703125,
"completions/mean_terminated_length": 296.0703125,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.4336,
"grad_norm": 0.0286890659481287,
"learning_rate": 6.298932384341636e-07,
"loss": -0.0039,
"num_tokens": 132682476.0,
"reward": 1.224784255027771,
"reward_std": 0.1470840871334076,
"rewards/accuracy_reward_long_step": 0.36328125,
"rewards/final_brier_reward_long_step": 0.7217913866043091,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7242205142974854,
"step": 271
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 502.0,
"completions/max_terminated_length": 502.0,
"completions/mean_length": 290.5546875,
"completions/mean_terminated_length": 290.5546875,
"completions/min_length": 152.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.4352,
"grad_norm": 0.028832513839006424,
"learning_rate": 6.281138790035587e-07,
"loss": 0.0023,
"num_tokens": 133175010.0,
"reward": 1.4400919675827026,
"reward_std": 0.13745911419391632,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.8417414426803589,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.746751606464386,
"step": 272
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 567.0,
"completions/max_terminated_length": 567.0,
"completions/mean_length": 294.5546875,
"completions/mean_terminated_length": 294.5546875,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.4368,
"grad_norm": 0.02651878260076046,
"learning_rate": 6.263345195729537e-07,
"loss": 0.005,
"num_tokens": 133671872.0,
"reward": 1.4299688339233398,
"reward_std": 0.17098167538642883,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.7499749660491943,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7980256676673889,
"step": 273
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 543.0,
"completions/max_terminated_length": 543.0,
"completions/mean_length": 294.70703125,
"completions/mean_terminated_length": 294.70703125,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.4384,
"grad_norm": 0.026192937046289444,
"learning_rate": 6.245551601423488e-07,
"loss": 0.0083,
"num_tokens": 134164181.0,
"reward": 1.3008532524108887,
"reward_std": 0.15340715646743774,
"rewards/accuracy_reward_long_step": 0.3984375,
"rewards/final_brier_reward_long_step": 0.8331218957901001,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7765412330627441,
"step": 274
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 677.0,
"completions/max_terminated_length": 677.0,
"completions/mean_length": 295.03515625,
"completions/mean_terminated_length": 295.03515625,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.44,
"grad_norm": 0.027292657643556595,
"learning_rate": 6.227758007117438e-07,
"loss": 0.0172,
"num_tokens": 134653262.0,
"reward": 1.4425451755523682,
"reward_std": 0.19112396240234375,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.7454347610473633,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7981832027435303,
"step": 275
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 475.0,
"completions/max_terminated_length": 475.0,
"completions/mean_length": 280.5625,
"completions/mean_terminated_length": 280.5625,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.4416,
"grad_norm": 0.03567759320139885,
"learning_rate": 6.209964412811388e-07,
"loss": -0.001,
"num_tokens": 135144110.0,
"reward": 1.4016071557998657,
"reward_std": 0.180876225233078,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7937929630279541,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6876357793807983,
"step": 276
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 615.0,
"completions/max_terminated_length": 615.0,
"completions/mean_length": 296.69921875,
"completions/mean_terminated_length": 296.69921875,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.4432,
"grad_norm": 0.027701787650585175,
"learning_rate": 6.192170818505338e-07,
"loss": 0.0152,
"num_tokens": 135657897.0,
"reward": 1.2873167991638184,
"reward_std": 0.15553465485572815,
"rewards/accuracy_reward_long_step": 0.40625,
"rewards/final_brier_reward_long_step": 0.7762769460678101,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7479904890060425,
"step": 277
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 567.0,
"completions/max_terminated_length": 567.0,
"completions/mean_length": 288.46484375,
"completions/mean_terminated_length": 288.46484375,
"completions/min_length": 115.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.4448,
"grad_norm": 0.027518663555383682,
"learning_rate": 6.174377224199287e-07,
"loss": 0.0105,
"num_tokens": 136161664.0,
"reward": 1.4033050537109375,
"reward_std": 0.16255879402160645,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.807449996471405,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7745203971862793,
"step": 278
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 601.0,
"completions/max_terminated_length": 601.0,
"completions/mean_length": 289.26171875,
"completions/mean_terminated_length": 289.26171875,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.4464,
"grad_norm": 0.030349889770150185,
"learning_rate": 6.156583629893238e-07,
"loss": 0.0083,
"num_tokens": 136661251.0,
"reward": 1.3778247833251953,
"reward_std": 0.22678440809249878,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.7540500164031982,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7259989976882935,
"step": 279
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 492.0,
"completions/max_terminated_length": 492.0,
"completions/mean_length": 292.5859375,
"completions/mean_terminated_length": 292.5859375,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.448,
"grad_norm": 0.026998843997716904,
"learning_rate": 6.138790035587188e-07,
"loss": 0.0038,
"num_tokens": 137162705.0,
"reward": 1.552132248878479,
"reward_std": 0.09305281937122345,
"rewards/accuracy_reward_long_step": 0.640625,
"rewards/final_brier_reward_long_step": 0.8342460989952087,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.811782956123352,
"step": 280
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 539.0,
"completions/max_terminated_length": 539.0,
"completions/mean_length": 301.2265625,
"completions/mean_terminated_length": 301.2265625,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.4496,
"grad_norm": 0.02617485634982586,
"learning_rate": 6.120996441281139e-07,
"loss": -0.0058,
"num_tokens": 137672107.0,
"reward": 1.3473117351531982,
"reward_std": 0.1873582899570465,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.6998116970062256,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.783184826374054,
"step": 281
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 560.0,
"completions/max_terminated_length": 560.0,
"completions/mean_length": 291.34375,
"completions/mean_terminated_length": 291.34375,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.4512,
"grad_norm": 0.02765418216586113,
"learning_rate": 6.103202846975089e-07,
"loss": 0.0093,
"num_tokens": 138184259.0,
"reward": 1.489346981048584,
"reward_std": 0.16857793927192688,
"rewards/accuracy_reward_long_step": 0.59375,
"rewards/final_brier_reward_long_step": 0.8282409906387329,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7541468143463135,
"step": 282
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 581.0,
"completions/max_terminated_length": 581.0,
"completions/mean_length": 286.015625,
"completions/mean_terminated_length": 286.015625,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.4528,
"grad_norm": 0.029208846390247345,
"learning_rate": 6.085409252669039e-07,
"loss": -0.0042,
"num_tokens": 138692327.0,
"reward": 1.3559755086898804,
"reward_std": 0.20295041799545288,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.7214418053627014,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7024602293968201,
"step": 283
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 609.0,
"completions/max_terminated_length": 609.0,
"completions/mean_length": 301.9296875,
"completions/mean_terminated_length": 301.9296875,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.4544,
"grad_norm": 0.030748292803764343,
"learning_rate": 6.067615658362989e-07,
"loss": -0.0041,
"num_tokens": 139194445.0,
"reward": 1.1845823526382446,
"reward_std": 0.12908682227134705,
"rewards/accuracy_reward_long_step": 0.30859375,
"rewards/final_brier_reward_long_step": 0.7271432876586914,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7768109440803528,
"step": 284
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 529.0,
"completions/max_terminated_length": 529.0,
"completions/mean_length": 282.84765625,
"completions/mean_terminated_length": 282.84765625,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.456,
"grad_norm": 0.0292031429708004,
"learning_rate": 6.04982206405694e-07,
"loss": 0.0071,
"num_tokens": 139705542.0,
"reward": 1.32478928565979,
"reward_std": 0.21371236443519592,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.7342382073402405,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8305443525314331,
"step": 285
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 494.0,
"completions/max_terminated_length": 494.0,
"completions/mean_length": 290.70703125,
"completions/mean_terminated_length": 290.70703125,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.4576,
"grad_norm": 0.027747957035899162,
"learning_rate": 6.032028469750889e-07,
"loss": -0.0002,
"num_tokens": 140203851.0,
"reward": 1.3524994850158691,
"reward_std": 0.12304867804050446,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.7922519445419312,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8208708167076111,
"step": 286
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 534.0,
"completions/max_terminated_length": 534.0,
"completions/mean_length": 293.1640625,
"completions/mean_terminated_length": 293.1640625,
"completions/min_length": 163.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.4592,
"grad_norm": 0.027378322556614876,
"learning_rate": 6.014234875444839e-07,
"loss": 0.013,
"num_tokens": 140702597.0,
"reward": 1.173850178718567,
"reward_std": 0.15952864289283752,
"rewards/accuracy_reward_long_step": 0.31640625,
"rewards/final_brier_reward_long_step": 0.7225792407989502,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7071964144706726,
"step": 287
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 671.0,
"completions/max_terminated_length": 671.0,
"completions/mean_length": 285.3203125,
"completions/mean_terminated_length": 285.3203125,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.4608,
"grad_norm": 0.02744467370212078,
"learning_rate": 5.99644128113879e-07,
"loss": 0.0087,
"num_tokens": 141206695.0,
"reward": 1.2383294105529785,
"reward_std": 0.14181900024414062,
"rewards/accuracy_reward_long_step": 0.359375,
"rewards/final_brier_reward_long_step": 0.7510156631469727,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7648018598556519,
"step": 288
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 440.0,
"completions/max_terminated_length": 440.0,
"completions/mean_length": 272.34375,
"completions/mean_terminated_length": 272.34375,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.4624,
"grad_norm": 0.02806474268436432,
"learning_rate": 5.97864768683274e-07,
"loss": 0.0099,
"num_tokens": 141695007.0,
"reward": 1.2011268138885498,
"reward_std": 0.14036910235881805,
"rewards/accuracy_reward_long_step": 0.3359375,
"rewards/final_brier_reward_long_step": 0.7097039222717285,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7510532736778259,
"step": 289
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 470.0,
"completions/max_terminated_length": 470.0,
"completions/mean_length": 266.53125,
"completions/mean_terminated_length": 266.53125,
"completions/min_length": 148.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.464,
"grad_norm": 0.028714032843708992,
"learning_rate": 5.96085409252669e-07,
"loss": 0.0078,
"num_tokens": 142182015.0,
"reward": 1.4051401615142822,
"reward_std": 0.19630657136440277,
"rewards/accuracy_reward_long_step": 0.515625,
"rewards/final_brier_reward_long_step": 0.7274429798126221,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8306175470352173,
"step": 290
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 440.0,
"completions/max_terminated_length": 440.0,
"completions/mean_length": 264.8203125,
"completions/mean_terminated_length": 264.8203125,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.4656,
"grad_norm": 0.02966773696243763,
"learning_rate": 5.94306049822064e-07,
"loss": -0.0106,
"num_tokens": 142663985.0,
"reward": 1.3878998756408691,
"reward_std": 0.13589531183242798,
"rewards/accuracy_reward_long_step": 0.515625,
"rewards/final_brier_reward_long_step": 0.7204523086547852,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7764595746994019,
"step": 291
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 501.0,
"completions/max_terminated_length": 501.0,
"completions/mean_length": 279.4765625,
"completions/mean_terminated_length": 279.4765625,
"completions/min_length": 173.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.4672,
"grad_norm": 0.02832869067788124,
"learning_rate": 5.925266903914591e-07,
"loss": 0.005,
"num_tokens": 143170555.0,
"reward": 1.2539631128311157,
"reward_std": 0.18463820219039917,
"rewards/accuracy_reward_long_step": 0.3984375,
"rewards/final_brier_reward_long_step": 0.6404902338981628,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7816122174263,
"step": 292
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 558.0,
"completions/max_terminated_length": 558.0,
"completions/mean_length": 261.421875,
"completions/mean_terminated_length": 261.421875,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.4688,
"grad_norm": 0.03127996623516083,
"learning_rate": 5.90747330960854e-07,
"loss": 0.0174,
"num_tokens": 143668759.0,
"reward": 1.3849046230316162,
"reward_std": 0.14204376935958862,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.8194859027862549,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.767007052898407,
"step": 293
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 525.0,
"completions/max_terminated_length": 525.0,
"completions/mean_length": 280.4375,
"completions/mean_terminated_length": 280.4375,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.4704,
"grad_norm": 0.030284756794571877,
"learning_rate": 5.889679715302491e-07,
"loss": -0.0029,
"num_tokens": 144175575.0,
"reward": 1.3805763721466064,
"reward_std": 0.18626053631305695,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.7643499970436096,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8048302531242371,
"step": 294
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 481.0,
"completions/max_terminated_length": 481.0,
"completions/mean_length": 267.71484375,
"completions/mean_terminated_length": 267.71484375,
"completions/min_length": 107.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.472,
"grad_norm": 0.02768511138856411,
"learning_rate": 5.871886120996441e-07,
"loss": -0.0029,
"num_tokens": 144659334.0,
"reward": 1.3032722473144531,
"reward_std": 0.13138622045516968,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7461843490600586,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8106545805931091,
"step": 295
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 436.0,
"completions/max_terminated_length": 436.0,
"completions/mean_length": 271.9453125,
"completions/mean_terminated_length": 273.01177978515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.4736,
"grad_norm": 0.03377687931060791,
"learning_rate": 5.854092526690391e-07,
"loss": 0.0064,
"num_tokens": 145164320.0,
"reward": 1.291682243347168,
"reward_std": 0.21950051188468933,
"rewards/accuracy_reward_long_step": 0.3984375,
"rewards/final_brier_reward_long_step": 0.7516234517097473,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8291677236557007,
"step": 296
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 491.0,
"completions/max_terminated_length": 491.0,
"completions/mean_length": 261.671875,
"completions/mean_terminated_length": 261.671875,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.4752,
"grad_norm": 0.0278344564139843,
"learning_rate": 5.836298932384342e-07,
"loss": -0.0083,
"num_tokens": 145661492.0,
"reward": 1.4372165203094482,
"reward_std": 0.19561487436294556,
"rewards/accuracy_reward_long_step": 0.52734375,
"rewards/final_brier_reward_long_step": 0.7994953393936157,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8478083610534668,
"step": 297
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 459.0,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 260.375,
"completions/mean_terminated_length": 260.375,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.4768,
"grad_norm": 0.030550826340913773,
"learning_rate": 5.818505338078291e-07,
"loss": 0.0046,
"num_tokens": 146145276.0,
"reward": 1.3957126140594482,
"reward_std": 0.1581364870071411,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7615882754325867,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6962625980377197,
"step": 298
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 262.453125,
"completions/mean_terminated_length": 262.453125,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.4784,
"grad_norm": 0.02928241901099682,
"learning_rate": 5.800711743772242e-07,
"loss": -0.0049,
"num_tokens": 146631200.0,
"reward": 1.560309648513794,
"reward_std": 0.15518754720687866,
"rewards/accuracy_reward_long_step": 0.68359375,
"rewards/final_brier_reward_long_step": 0.7499921321868896,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.756871223449707,
"step": 299
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 585.0,
"completions/max_terminated_length": 585.0,
"completions/mean_length": 270.3203125,
"completions/mean_terminated_length": 270.3203125,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.48,
"grad_norm": 0.032593853771686554,
"learning_rate": 5.782918149466191e-07,
"loss": 0.0044,
"num_tokens": 147121786.0,
"reward": 1.4181792736053467,
"reward_std": 0.17730304598808289,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.7884241342544556,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8061679601669312,
"step": 300
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 471.0,
"completions/max_terminated_length": 471.0,
"completions/mean_length": 275.15625,
"completions/mean_terminated_length": 275.15625,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.4816,
"grad_norm": 0.028711630031466484,
"learning_rate": 5.765124555160142e-07,
"loss": 0.0037,
"num_tokens": 147618706.0,
"reward": 1.3091957569122314,
"reward_std": 0.17498339712619781,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.6745136976242065,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.859144389629364,
"step": 301
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 469.0,
"completions/max_terminated_length": 469.0,
"completions/mean_length": 272.421875,
"completions/mean_terminated_length": 272.421875,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.4832,
"grad_norm": 0.02766992151737213,
"learning_rate": 5.747330960854092e-07,
"loss": -0.0112,
"num_tokens": 148128422.0,
"reward": 1.4776198863983154,
"reward_std": 0.13255634903907776,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.7580232620239258,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7930811643600464,
"step": 302
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 464.0,
"completions/max_terminated_length": 464.0,
"completions/mean_length": 270.1875,
"completions/mean_terminated_length": 270.1875,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.4848,
"grad_norm": 0.03053920716047287,
"learning_rate": 5.729537366548043e-07,
"loss": 0.0108,
"num_tokens": 148619142.0,
"reward": 1.3393454551696777,
"reward_std": 0.1206967830657959,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.7705498933792114,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7899569272994995,
"step": 303
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 578.0,
"completions/max_terminated_length": 578.0,
"completions/mean_length": 275.53125,
"completions/mean_terminated_length": 275.53125,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.4864,
"grad_norm": 0.0303883645683527,
"learning_rate": 5.711743772241993e-07,
"loss": 0.0186,
"num_tokens": 149120358.0,
"reward": 1.4402146339416504,
"reward_std": 0.17118000984191895,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.8297659158706665,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8373425006866455,
"step": 304
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 565.0,
"completions/max_terminated_length": 565.0,
"completions/mean_length": 282.84765625,
"completions/mean_terminated_length": 282.84765625,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.488,
"grad_norm": 0.02846652828156948,
"learning_rate": 5.693950177935943e-07,
"loss": 0.0182,
"num_tokens": 149610279.0,
"reward": 1.5291298627853394,
"reward_std": 0.18615154922008514,
"rewards/accuracy_reward_long_step": 0.62890625,
"rewards/final_brier_reward_long_step": 0.8304492235183716,
"rewards/format_reward_long_step": 0.9921875,
"rewards/stepwise_brier_reward_long_step": 0.7860701084136963,
"step": 305
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 509.0,
"completions/max_terminated_length": 509.0,
"completions/mean_length": 264.23046875,
"completions/mean_terminated_length": 264.23046875,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.4896,
"grad_norm": 0.03294990211725235,
"learning_rate": 5.676156583629893e-07,
"loss": 0.0049,
"num_tokens": 150121194.0,
"reward": 1.437424659729004,
"reward_std": 0.1931784749031067,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.7560929656028748,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7904808521270752,
"step": 306
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 466.0,
"completions/max_terminated_length": 466.0,
"completions/mean_length": 270.6640625,
"completions/mean_terminated_length": 270.6640625,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.4912,
"grad_norm": 0.028145214542746544,
"learning_rate": 5.658362989323842e-07,
"loss": 0.0125,
"num_tokens": 150638356.0,
"reward": 1.3298184871673584,
"reward_std": 0.15343712270259857,
"rewards/accuracy_reward_long_step": 0.45703125,
"rewards/final_brier_reward_long_step": 0.7142324447631836,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.77691650390625,
"step": 307
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 506.0,
"completions/max_terminated_length": 506.0,
"completions/mean_length": 265.69140625,
"completions/mean_terminated_length": 265.69140625,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.4928,
"grad_norm": 0.03243206813931465,
"learning_rate": 5.640569395017794e-07,
"loss": -0.0123,
"num_tokens": 151144573.0,
"reward": 1.4519245624542236,
"reward_std": 0.1609051525592804,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.7463042736053467,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7176437377929688,
"step": 308
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 525.0,
"completions/max_terminated_length": 525.0,
"completions/mean_length": 253.2734375,
"completions/mean_terminated_length": 253.2734375,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.4944,
"grad_norm": 0.028735455125570297,
"learning_rate": 5.622775800711744e-07,
"loss": 0.0077,
"num_tokens": 151640987.0,
"reward": 1.323132038116455,
"reward_std": 0.12191449105739594,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.7362834215164185,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7749943733215332,
"step": 309
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 517.0,
"completions/max_terminated_length": 517.0,
"completions/mean_length": 267.671875,
"completions/mean_terminated_length": 267.671875,
"completions/min_length": 171.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.496,
"grad_norm": 0.03202186897397041,
"learning_rate": 5.604982206405694e-07,
"loss": 0.0031,
"num_tokens": 152130871.0,
"reward": 1.4370348453521729,
"reward_std": 0.1757480651140213,
"rewards/accuracy_reward_long_step": 0.57421875,
"rewards/final_brier_reward_long_step": 0.7055065631866455,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.745758056640625,
"step": 310
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 476.0,
"completions/max_terminated_length": 476.0,
"completions/mean_length": 268.640625,
"completions/mean_terminated_length": 268.640625,
"completions/min_length": 158.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.4976,
"grad_norm": 0.029871582984924316,
"learning_rate": 5.587188612099644e-07,
"loss": 0.0021,
"num_tokens": 152631003.0,
"reward": 1.5090341567993164,
"reward_std": 0.16095715761184692,
"rewards/accuracy_reward_long_step": 0.625,
"rewards/final_brier_reward_long_step": 0.764398455619812,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7717381715774536,
"step": 311
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 524.0,
"completions/max_terminated_length": 524.0,
"completions/mean_length": 259.47265625,
"completions/mean_terminated_length": 259.47265625,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.4992,
"grad_norm": 0.03202946111559868,
"learning_rate": 5.569395017793594e-07,
"loss": -0.006,
"num_tokens": 153126428.0,
"reward": 1.4881701469421387,
"reward_std": 0.14949887990951538,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.7662238478660583,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.842707097530365,
"step": 312
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 515.0,
"completions/max_terminated_length": 515.0,
"completions/mean_length": 249.61328125,
"completions/mean_terminated_length": 249.61328125,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.5008,
"grad_norm": 0.03125175088644028,
"learning_rate": 5.551601423487544e-07,
"loss": 0.0167,
"num_tokens": 153607737.0,
"reward": 1.461435079574585,
"reward_std": 0.16793528199195862,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.8274839520454407,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7682562470436096,
"step": 313
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 547.0,
"completions/max_terminated_length": 547.0,
"completions/mean_length": 260.56640625,
"completions/mean_terminated_length": 260.56640625,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.5024,
"grad_norm": 0.03080155700445175,
"learning_rate": 5.533807829181495e-07,
"loss": -0.0045,
"num_tokens": 154100658.0,
"reward": 1.3649942874908447,
"reward_std": 0.20213352143764496,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.7633898258209229,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7434619665145874,
"step": 314
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 465.0,
"completions/max_terminated_length": 465.0,
"completions/mean_length": 260.87109375,
"completions/mean_terminated_length": 260.87109375,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.504,
"grad_norm": 0.030286213383078575,
"learning_rate": 5.516014234875445e-07,
"loss": 0.0064,
"num_tokens": 154586593.0,
"reward": 1.4095053672790527,
"reward_std": 0.19581949710845947,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.7196574211120605,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7621144652366638,
"step": 315
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 575.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 282.4765625,
"completions/mean_terminated_length": 282.4765625,
"completions/min_length": 146.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.5056,
"grad_norm": 0.03086298704147339,
"learning_rate": 5.498220640569395e-07,
"loss": -0.0094,
"num_tokens": 155107683.0,
"reward": 1.233945608139038,
"reward_std": 0.2137664556503296,
"rewards/accuracy_reward_long_step": 0.40234375,
"rewards/final_brier_reward_long_step": 0.5642339587211609,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7621732950210571,
"step": 316
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 646.0,
"completions/max_terminated_length": 646.0,
"completions/mean_length": 273.9296875,
"completions/mean_terminated_length": 273.9296875,
"completions/min_length": 155.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.5072,
"grad_norm": 0.029751170426607132,
"learning_rate": 5.480427046263345e-07,
"loss": 0.0058,
"num_tokens": 155611881.0,
"reward": 1.3837661743164062,
"reward_std": 0.1578415334224701,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7951062917709351,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8024587631225586,
"step": 317
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 655.0,
"completions/max_terminated_length": 655.0,
"completions/mean_length": 270.0625,
"completions/mean_terminated_length": 270.0625,
"completions/min_length": 138.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.5088,
"grad_norm": 0.030748968943953514,
"learning_rate": 5.462633451957295e-07,
"loss": 0.0082,
"num_tokens": 156108097.0,
"reward": 1.4630606174468994,
"reward_std": 0.1447874754667282,
"rewards/accuracy_reward_long_step": 0.5546875,
"rewards/final_brier_reward_long_step": 0.8417631983757019,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7917290329933167,
"step": 318
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 568.0,
"completions/max_terminated_length": 568.0,
"completions/mean_length": 261.6328125,
"completions/mean_terminated_length": 261.6328125,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.5104,
"grad_norm": 0.03255579620599747,
"learning_rate": 5.444839857651245e-07,
"loss": 0.0189,
"num_tokens": 156600459.0,
"reward": 1.453078031539917,
"reward_std": 0.21487998962402344,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.7861437797546387,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7917930483818054,
"step": 319
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 482.0,
"completions/max_terminated_length": 482.0,
"completions/mean_length": 255.890625,
"completions/mean_terminated_length": 255.890625,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.512,
"grad_norm": 0.03229495882987976,
"learning_rate": 5.427046263345195e-07,
"loss": 0.0071,
"num_tokens": 157085455.0,
"reward": 1.3672808408737183,
"reward_std": 0.13604342937469482,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7324371337890625,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8148113489151001,
"step": 320
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 508.0,
"completions/max_terminated_length": 508.0,
"completions/mean_length": 260.43359375,
"completions/mean_terminated_length": 260.43359375,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.5136,
"grad_norm": 0.03101903200149536,
"learning_rate": 5.409252669039146e-07,
"loss": 0.0156,
"num_tokens": 157573070.0,
"reward": 1.4484410285949707,
"reward_std": 0.19034847617149353,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7853777408599854,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.758386492729187,
"step": 321
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 582.0,
"completions/max_terminated_length": 582.0,
"completions/mean_length": 259.2109375,
"completions/mean_terminated_length": 259.2109375,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.5152,
"grad_norm": 0.032188545912504196,
"learning_rate": 5.391459074733096e-07,
"loss": 0.0058,
"num_tokens": 158069828.0,
"reward": 1.2921218872070312,
"reward_std": 0.1610349416732788,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.6915820837020874,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.703468382358551,
"step": 322
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 503.0,
"completions/max_terminated_length": 503.0,
"completions/mean_length": 253.05859375,
"completions/mean_terminated_length": 253.05859375,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.5168,
"grad_norm": 0.030405355617403984,
"learning_rate": 5.373665480427047e-07,
"loss": 0.0005,
"num_tokens": 158552475.0,
"reward": 1.4460304975509644,
"reward_std": 0.18330608308315277,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.8409663438796997,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7869055271148682,
"step": 323
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 450.0,
"completions/max_terminated_length": 450.0,
"completions/mean_length": 249.57421875,
"completions/mean_terminated_length": 249.57421875,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.5184,
"grad_norm": 0.033257272094488144,
"learning_rate": 5.355871886120996e-07,
"loss": -0.0137,
"num_tokens": 159043982.0,
"reward": 1.4014875888824463,
"reward_std": 0.10161018371582031,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.8111592531204224,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8104158043861389,
"step": 324
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 517.0,
"completions/max_terminated_length": 517.0,
"completions/mean_length": 254.00390625,
"completions/mean_terminated_length": 254.00390625,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.52,
"grad_norm": 0.03563224524259567,
"learning_rate": 5.338078291814946e-07,
"loss": -0.0095,
"num_tokens": 159536455.0,
"reward": 1.3375245332717896,
"reward_std": 0.16401183605194092,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.6796140670776367,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7954840660095215,
"step": 325
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 461.0,
"completions/max_terminated_length": 461.0,
"completions/mean_length": 242.38671875,
"completions/mean_terminated_length": 242.38671875,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.5216,
"grad_norm": 0.034420643001794815,
"learning_rate": 5.320284697508896e-07,
"loss": 0.0041,
"num_tokens": 160011394.0,
"reward": 1.3240625858306885,
"reward_std": 0.15140679478645325,
"rewards/accuracy_reward_long_step": 0.4609375,
"rewards/final_brier_reward_long_step": 0.7299777269363403,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7225224375724792,
"step": 326
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 497.0,
"completions/max_terminated_length": 497.0,
"completions/mean_length": 248.5546875,
"completions/mean_terminated_length": 248.5546875,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.5232,
"grad_norm": 0.03537153825163841,
"learning_rate": 5.302491103202846e-07,
"loss": -0.012,
"num_tokens": 160502680.0,
"reward": 1.5624842643737793,
"reward_std": 0.18345743417739868,
"rewards/accuracy_reward_long_step": 0.69921875,
"rewards/final_brier_reward_long_step": 0.7545043230056763,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6985577344894409,
"step": 327
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 608.0,
"completions/max_terminated_length": 608.0,
"completions/mean_length": 255.44921875,
"completions/mean_terminated_length": 255.44921875,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.5248,
"grad_norm": 0.038355033844709396,
"learning_rate": 5.284697508896797e-07,
"loss": 0.0104,
"num_tokens": 160992083.0,
"reward": 1.406353235244751,
"reward_std": 0.15258005261421204,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.7742776274681091,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7730100750923157,
"step": 328
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 478.0,
"completions/max_terminated_length": 478.0,
"completions/mean_length": 249.23046875,
"completions/mean_terminated_length": 249.23046875,
"completions/min_length": 96.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.5264,
"grad_norm": 0.03090524673461914,
"learning_rate": 5.266903914590747e-07,
"loss": 0.0061,
"num_tokens": 161497718.0,
"reward": 1.4268403053283691,
"reward_std": 0.11180461198091507,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.8800667524337769,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8429189324378967,
"step": 329
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 442.0,
"completions/max_terminated_length": 442.0,
"completions/mean_length": 250.3515625,
"completions/mean_terminated_length": 250.3515625,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.528,
"grad_norm": 0.034447081387043,
"learning_rate": 5.249110320284698e-07,
"loss": 0.0033,
"num_tokens": 161980056.0,
"reward": 1.3684167861938477,
"reward_std": 0.18932107090950012,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7735214829444885,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7626460790634155,
"step": 330
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 541.0,
"completions/max_terminated_length": 541.0,
"completions/mean_length": 249.62109375,
"completions/mean_terminated_length": 249.62109375,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.5296,
"grad_norm": 0.034051742404699326,
"learning_rate": 5.231316725978647e-07,
"loss": -0.0067,
"num_tokens": 162477343.0,
"reward": 1.403045654296875,
"reward_std": 0.18791253864765167,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.7827702760696411,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7434748411178589,
"step": 331
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 618.0,
"completions/max_terminated_length": 618.0,
"completions/mean_length": 243.046875,
"completions/mean_terminated_length": 243.046875,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.5312,
"grad_norm": 0.03361974656581879,
"learning_rate": 5.213523131672598e-07,
"loss": 0.0114,
"num_tokens": 162956923.0,
"reward": 1.544013500213623,
"reward_std": 0.20560047030448914,
"rewards/accuracy_reward_long_step": 0.66796875,
"rewards/final_brier_reward_long_step": 0.7412210702896118,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7629580497741699,
"step": 332
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 578.0,
"completions/max_terminated_length": 578.0,
"completions/mean_length": 256.22265625,
"completions/mean_terminated_length": 256.22265625,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.5328,
"grad_norm": 0.03932815417647362,
"learning_rate": 5.195729537366548e-07,
"loss": -0.0093,
"num_tokens": 163447412.0,
"reward": 1.455832839012146,
"reward_std": 0.1748843789100647,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.825259804725647,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.810571551322937,
"step": 333
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 538.0,
"completions/max_terminated_length": 538.0,
"completions/mean_length": 245.2265625,
"completions/mean_terminated_length": 245.2265625,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.5344,
"grad_norm": 0.03335999324917793,
"learning_rate": 5.177935943060498e-07,
"loss": -0.0032,
"num_tokens": 163941862.0,
"reward": 1.3817181587219238,
"reward_std": 0.17352280020713806,
"rewards/accuracy_reward_long_step": 0.515625,
"rewards/final_brier_reward_long_step": 0.7708051204681396,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6935676336288452,
"step": 334
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 484.0,
"completions/max_terminated_length": 484.0,
"completions/mean_length": 236.14453125,
"completions/mean_terminated_length": 236.14453125,
"completions/min_length": 96.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.536,
"grad_norm": 0.03453889861702919,
"learning_rate": 5.160142348754448e-07,
"loss": 0.0059,
"num_tokens": 164414147.0,
"reward": 1.4293192625045776,
"reward_std": 0.1838047057390213,
"rewards/accuracy_reward_long_step": 0.52734375,
"rewards/final_brier_reward_long_step": 0.8181675672531128,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7897346019744873,
"step": 335
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 448.0,
"completions/max_terminated_length": 448.0,
"completions/mean_length": 247.52734375,
"completions/mean_terminated_length": 247.52734375,
"completions/min_length": 115.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.5376,
"grad_norm": 0.03347809612751007,
"learning_rate": 5.142348754448398e-07,
"loss": -0.0082,
"num_tokens": 164904322.0,
"reward": 1.4061800241470337,
"reward_std": 0.15439936518669128,
"rewards/accuracy_reward_long_step": 0.5546875,
"rewards/final_brier_reward_long_step": 0.740646481513977,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6653236746788025,
"step": 336
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 547.0,
"completions/max_terminated_length": 547.0,
"completions/mean_length": 246.3828125,
"completions/mean_terminated_length": 246.3828125,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.5392,
"grad_norm": 0.03456057235598564,
"learning_rate": 5.124555160142349e-07,
"loss": 0.002,
"num_tokens": 165401044.0,
"reward": 1.3541990518569946,
"reward_std": 0.1308155059814453,
"rewards/accuracy_reward_long_step": 0.515625,
"rewards/final_brier_reward_long_step": 0.6761799454689026,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6781162023544312,
"step": 337
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 693.0,
"completions/max_terminated_length": 693.0,
"completions/mean_length": 259.3046875,
"completions/mean_terminated_length": 259.3046875,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.5408,
"grad_norm": 0.029813647270202637,
"learning_rate": 5.106761565836298e-07,
"loss": 0.0009,
"num_tokens": 165906058.0,
"reward": 1.3256361484527588,
"reward_std": 0.19530092179775238,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.670098066329956,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7730719447135925,
"step": 338
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 246.8046875,
"completions/mean_terminated_length": 246.8046875,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.5424,
"grad_norm": 0.03796224668622017,
"learning_rate": 5.088967971530249e-07,
"loss": 0.0075,
"num_tokens": 166401920.0,
"reward": 1.4465839862823486,
"reward_std": 0.207666277885437,
"rewards/accuracy_reward_long_step": 0.625,
"rewards/final_brier_reward_long_step": 0.7033705711364746,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.5829657316207886,
"step": 339
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 522.0,
"completions/max_terminated_length": 522.0,
"completions/mean_length": 260.37109375,
"completions/mean_terminated_length": 260.37109375,
"completions/min_length": 155.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.544,
"grad_norm": 0.03487522527575493,
"learning_rate": 5.071174377224199e-07,
"loss": -0.0028,
"num_tokens": 166900079.0,
"reward": 1.364084243774414,
"reward_std": 0.12331333756446838,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.7763662934303284,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6643457412719727,
"step": 340
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 882.0,
"completions/max_terminated_length": 882.0,
"completions/mean_length": 253.23828125,
"completions/mean_terminated_length": 253.23828125,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.5456,
"grad_norm": 0.03571302443742752,
"learning_rate": 5.053380782918149e-07,
"loss": 0.0008,
"num_tokens": 167388084.0,
"reward": 1.4308526515960693,
"reward_std": 0.14994728565216064,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.7553993463516235,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7648867964744568,
"step": 341
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 547.0,
"completions/max_terminated_length": 547.0,
"completions/mean_length": 243.5703125,
"completions/mean_terminated_length": 243.5703125,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.5472,
"grad_norm": 0.034076888114213943,
"learning_rate": 5.0355871886121e-07,
"loss": -0.0049,
"num_tokens": 167867318.0,
"reward": 1.4327844381332397,
"reward_std": 0.17568854987621307,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7377663850784302,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7433711290359497,
"step": 342
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 492.0,
"completions/max_terminated_length": 492.0,
"completions/mean_length": 231.46875,
"completions/mean_terminated_length": 231.46875,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.5488,
"grad_norm": 0.03265485167503357,
"learning_rate": 5.01779359430605e-07,
"loss": 0.0058,
"num_tokens": 168334662.0,
"reward": 1.5790760517120361,
"reward_std": 0.20140470564365387,
"rewards/accuracy_reward_long_step": 0.69921875,
"rewards/final_brier_reward_long_step": 0.7507095336914062,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7765324115753174,
"step": 343
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 459.0,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 242.12890625,
"completions/mean_terminated_length": 242.12890625,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.5504,
"grad_norm": 0.0313512459397316,
"learning_rate": 5e-07,
"loss": -0.0061,
"num_tokens": 168823871.0,
"reward": 1.3449064493179321,
"reward_std": 0.16971346735954285,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.7380057573318481,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7509950995445251,
"step": 344
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 432.0,
"completions/max_terminated_length": 432.0,
"completions/mean_length": 240.40625,
"completions/mean_terminated_length": 240.40625,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.552,
"grad_norm": 0.035064004361629486,
"learning_rate": 4.98220640569395e-07,
"loss": 0.0009,
"num_tokens": 169308207.0,
"reward": 1.447171688079834,
"reward_std": 0.1690702587366104,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.766781210899353,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7406556606292725,
"step": 345
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 542.0,
"completions/max_terminated_length": 542.0,
"completions/mean_length": 255.90234375,
"completions/mean_terminated_length": 255.90234375,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.5536,
"grad_norm": 0.03382926061749458,
"learning_rate": 4.9644128113879e-07,
"loss": 0.0094,
"num_tokens": 169790286.0,
"reward": 1.3020596504211426,
"reward_std": 0.1404658854007721,
"rewards/accuracy_reward_long_step": 0.4375,
"rewards/final_brier_reward_long_step": 0.7163107991218567,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.741927981376648,
"step": 346
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 472.0,
"completions/max_terminated_length": 472.0,
"completions/mean_length": 238.2265625,
"completions/mean_terminated_length": 238.2265625,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.5552,
"grad_norm": 0.04365375638008118,
"learning_rate": 4.94661921708185e-07,
"loss": 0.0012,
"num_tokens": 170253896.0,
"reward": 1.3717888593673706,
"reward_std": 0.159200519323349,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.7934491634368896,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8187063932418823,
"step": 347
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 448.0,
"completions/max_terminated_length": 448.0,
"completions/mean_length": 253.41796875,
"completions/mean_terminated_length": 253.41796875,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.5568,
"grad_norm": 0.034737542271614075,
"learning_rate": 4.9288256227758e-07,
"loss": 0.0096,
"num_tokens": 170751355.0,
"reward": 1.2173829078674316,
"reward_std": 0.1510533094406128,
"rewards/accuracy_reward_long_step": 0.375,
"rewards/final_brier_reward_long_step": 0.6595523357391357,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7099793553352356,
"step": 348
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 456.0,
"completions/max_terminated_length": 456.0,
"completions/mean_length": 239.58203125,
"completions/mean_terminated_length": 239.58203125,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.5584,
"grad_norm": 0.04001186043024063,
"learning_rate": 4.91103202846975e-07,
"loss": -0.002,
"num_tokens": 171235744.0,
"reward": 1.4833966493606567,
"reward_std": 0.1600235551595688,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.8168105483055115,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7730263471603394,
"step": 349
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 571.0,
"completions/max_terminated_length": 571.0,
"completions/mean_length": 255.8125,
"completions/mean_terminated_length": 255.8125,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.56,
"grad_norm": 0.03591832518577576,
"learning_rate": 4.893238434163701e-07,
"loss": -0.0068,
"num_tokens": 171725112.0,
"reward": 1.219707727432251,
"reward_std": 0.14950095117092133,
"rewards/accuracy_reward_long_step": 0.37109375,
"rewards/final_brier_reward_long_step": 0.6841285228729248,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7103271484375,
"step": 350
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 608.0,
"completions/max_terminated_length": 608.0,
"completions/mean_length": 253.22265625,
"completions/mean_terminated_length": 253.22265625,
"completions/min_length": 80.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.5616,
"grad_norm": 0.033379342406988144,
"learning_rate": 4.875444839857651e-07,
"loss": 0.0029,
"num_tokens": 172224321.0,
"reward": 1.3003888130187988,
"reward_std": 0.14580082893371582,
"rewards/accuracy_reward_long_step": 0.41015625,
"rewards/final_brier_reward_long_step": 0.7715871334075928,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7893432974815369,
"step": 351
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 247.00390625,
"completions/mean_terminated_length": 247.00390625,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.5632,
"grad_norm": 0.03563377261161804,
"learning_rate": 4.857651245551601e-07,
"loss": -0.0141,
"num_tokens": 172723978.0,
"reward": 1.2515490055084229,
"reward_std": 0.20609885454177856,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.5958093404769897,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7072615027427673,
"step": 352
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 444.0,
"completions/max_terminated_length": 444.0,
"completions/mean_length": 240.86328125,
"completions/mean_terminated_length": 240.86328125,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.5648,
"grad_norm": 0.030260441824793816,
"learning_rate": 4.839857651245551e-07,
"loss": 0.0111,
"num_tokens": 173228511.0,
"reward": 1.430063009262085,
"reward_std": 0.10446056723594666,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.7712934017181396,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8552085757255554,
"step": 353
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 423.0,
"completions/max_terminated_length": 423.0,
"completions/mean_length": 246.74609375,
"completions/mean_terminated_length": 246.74609375,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.5664,
"grad_norm": 0.03534315153956413,
"learning_rate": 4.822064056939501e-07,
"loss": 0.007,
"num_tokens": 173716982.0,
"reward": 1.3442493677139282,
"reward_std": 0.20917916297912598,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7205374836921692,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7345851063728333,
"step": 354
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 458.0,
"completions/max_terminated_length": 458.0,
"completions/mean_length": 243.53125,
"completions/mean_terminated_length": 243.53125,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.568,
"grad_norm": 0.046947211027145386,
"learning_rate": 4.804270462633451e-07,
"loss": -0.0176,
"num_tokens": 174209438.0,
"reward": 1.4447617530822754,
"reward_std": 0.1805291771888733,
"rewards/accuracy_reward_long_step": 0.57421875,
"rewards/final_brier_reward_long_step": 0.7230343818664551,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7591376304626465,
"step": 355
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 611.0,
"completions/max_terminated_length": 611.0,
"completions/mean_length": 244.32421875,
"completions/mean_terminated_length": 244.32421875,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.5696,
"grad_norm": 0.03166520223021507,
"learning_rate": 4.786476868327403e-07,
"loss": 0.0097,
"num_tokens": 174683969.0,
"reward": 1.4561214447021484,
"reward_std": 0.1352284848690033,
"rewards/accuracy_reward_long_step": 0.57421875,
"rewards/final_brier_reward_long_step": 0.7665960788726807,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7610146999359131,
"step": 356
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 540.0,
"completions/max_terminated_length": 540.0,
"completions/mean_length": 265.96875,
"completions/mean_terminated_length": 265.96875,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.5712,
"grad_norm": 0.03730427846312523,
"learning_rate": 4.768683274021353e-07,
"loss": -0.0075,
"num_tokens": 175174505.0,
"reward": 1.296769142150879,
"reward_std": 0.15302547812461853,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7661605477333069,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7646663188934326,
"step": 357
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 571.0,
"completions/max_terminated_length": 571.0,
"completions/mean_length": 258.87890625,
"completions/mean_terminated_length": 258.87890625,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.5728,
"grad_norm": 0.038667719811201096,
"learning_rate": 4.7508896797153023e-07,
"loss": 0.01,
"num_tokens": 175667706.0,
"reward": 1.4412386417388916,
"reward_std": 0.21622003614902496,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.7790859341621399,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8061808347702026,
"step": 358
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 449.0,
"completions/max_terminated_length": 449.0,
"completions/mean_length": 249.40234375,
"completions/mean_terminated_length": 249.40234375,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.5744,
"grad_norm": 0.03672811761498451,
"learning_rate": 4.733096085409252e-07,
"loss": 0.0064,
"num_tokens": 176151705.0,
"reward": 1.4096336364746094,
"reward_std": 0.14016187191009521,
"rewards/accuracy_reward_long_step": 0.51171875,
"rewards/final_brier_reward_long_step": 0.8072555065155029,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7844040393829346,
"step": 359
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 491.0,
"completions/max_terminated_length": 491.0,
"completions/mean_length": 256.26953125,
"completions/mean_terminated_length": 256.26953125,
"completions/min_length": 111.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.576,
"grad_norm": 0.031908176839351654,
"learning_rate": 4.7153024911032026e-07,
"loss": 0.0018,
"num_tokens": 176654878.0,
"reward": 1.2949192523956299,
"reward_std": 0.1339997947216034,
"rewards/accuracy_reward_long_step": 0.41015625,
"rewards/final_brier_reward_long_step": 0.7606054544448853,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7784462571144104,
"step": 360
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 586.0,
"completions/max_terminated_length": 586.0,
"completions/mean_length": 260.42578125,
"completions/mean_terminated_length": 260.42578125,
"completions/min_length": 145.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.5776,
"grad_norm": 0.03353104740381241,
"learning_rate": 4.697508896797153e-07,
"loss": -0.0028,
"num_tokens": 177158483.0,
"reward": 1.3936898708343506,
"reward_std": 0.17937517166137695,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.763106644153595,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7335278391838074,
"step": 361
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 533.0,
"completions/max_terminated_length": 533.0,
"completions/mean_length": 250.55078125,
"completions/mean_terminated_length": 250.55078125,
"completions/min_length": 155.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.5792,
"grad_norm": 0.0516292005777359,
"learning_rate": 4.679715302491103e-07,
"loss": 0.002,
"num_tokens": 177665080.0,
"reward": 1.3700807094573975,
"reward_std": 0.1468803435564041,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.6796808242797852,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7068922519683838,
"step": 362
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 593.0,
"completions/max_terminated_length": 593.0,
"completions/mean_length": 257.5625,
"completions/mean_terminated_length": 257.5625,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.5808,
"grad_norm": 0.036485347896814346,
"learning_rate": 4.661921708185053e-07,
"loss": -0.0097,
"num_tokens": 178161496.0,
"reward": 1.1560263633728027,
"reward_std": 0.13470560312271118,
"rewards/accuracy_reward_long_step": 0.2890625,
"rewards/final_brier_reward_long_step": 0.6383723020553589,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.829483687877655,
"step": 363
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 399.0,
"completions/max_terminated_length": 399.0,
"completions/mean_length": 231.03515625,
"completions/mean_terminated_length": 231.03515625,
"completions/min_length": 129.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.5824,
"grad_norm": 0.039585795253515244,
"learning_rate": 4.644128113879003e-07,
"loss": 0.0097,
"num_tokens": 178643185.0,
"reward": 1.4548025131225586,
"reward_std": 0.1070779412984848,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.7518347501754761,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7861253023147583,
"step": 364
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 240.80078125,
"completions/mean_terminated_length": 240.80078125,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.584,
"grad_norm": 0.037114016711711884,
"learning_rate": 4.626334519572954e-07,
"loss": -0.0042,
"num_tokens": 179134550.0,
"reward": 1.3695037364959717,
"reward_std": 0.16490252315998077,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.7435758113861084,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8281888365745544,
"step": 365
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 478.0,
"completions/max_terminated_length": 478.0,
"completions/mean_length": 236.41796875,
"completions/mean_terminated_length": 236.41796875,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.5856,
"grad_norm": 0.03539412468671799,
"learning_rate": 4.608540925266904e-07,
"loss": 0.0036,
"num_tokens": 179618601.0,
"reward": 1.3902171850204468,
"reward_std": 0.11728814244270325,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.767492949962616,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7152509689331055,
"step": 366
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 467.0,
"completions/max_terminated_length": 467.0,
"completions/mean_length": 250.51953125,
"completions/mean_terminated_length": 250.51953125,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.5872,
"grad_norm": 0.03185239061713219,
"learning_rate": 4.590747330960854e-07,
"loss": 0.0033,
"num_tokens": 180097414.0,
"reward": 1.4346997737884521,
"reward_std": 0.12333646416664124,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.8788655996322632,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7818087339401245,
"step": 367
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 491.0,
"completions/max_terminated_length": 491.0,
"completions/mean_length": 243.2421875,
"completions/mean_terminated_length": 243.2421875,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.5888,
"grad_norm": 0.05108208209276199,
"learning_rate": 4.5729537366548043e-07,
"loss": 0.0105,
"num_tokens": 180588972.0,
"reward": 1.4830061197280884,
"reward_std": 0.13395658135414124,
"rewards/accuracy_reward_long_step": 0.59375,
"rewards/final_brier_reward_long_step": 0.7892441749572754,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7677804231643677,
"step": 368
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 488.0,
"completions/max_terminated_length": 488.0,
"completions/mean_length": 250.55859375,
"completions/mean_terminated_length": 250.55859375,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.5904,
"grad_norm": 0.033352505415678024,
"learning_rate": 4.555160142348754e-07,
"loss": -0.0026,
"num_tokens": 181083883.0,
"reward": 1.5408804416656494,
"reward_std": 0.17830899357795715,
"rewards/accuracy_reward_long_step": 0.62109375,
"rewards/final_brier_reward_long_step": 0.8529410362243652,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.826205849647522,
"step": 369
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 660.0,
"completions/max_terminated_length": 660.0,
"completions/mean_length": 249.2734375,
"completions/mean_terminated_length": 249.2734375,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.592,
"grad_norm": 0.03348240256309509,
"learning_rate": 4.537366548042704e-07,
"loss": 0.006,
"num_tokens": 181570745.0,
"reward": 1.3028314113616943,
"reward_std": 0.21476979553699493,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.7177179455757141,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7357947826385498,
"step": 370
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 470.0,
"completions/max_terminated_length": 470.0,
"completions/mean_length": 235.62109375,
"completions/mean_terminated_length": 235.62109375,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.5936,
"grad_norm": 0.029231376945972443,
"learning_rate": 4.519572953736655e-07,
"loss": -0.0131,
"num_tokens": 182061952.0,
"reward": 1.4700736999511719,
"reward_std": 0.0870005190372467,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7968558669090271,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.833439290523529,
"step": 371
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 489.0,
"completions/max_terminated_length": 489.0,
"completions/mean_length": 244.97265625,
"completions/mean_terminated_length": 244.97265625,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.5952,
"grad_norm": 0.0341234989464283,
"learning_rate": 4.501779359430605e-07,
"loss": -0.003,
"num_tokens": 182547297.0,
"reward": 1.5000258684158325,
"reward_std": 0.12720796465873718,
"rewards/accuracy_reward_long_step": 0.609375,
"rewards/final_brier_reward_long_step": 0.8284410238265991,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7341622710227966,
"step": 372
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 557.0,
"completions/max_terminated_length": 557.0,
"completions/mean_length": 272.23046875,
"completions/mean_terminated_length": 272.23046875,
"completions/min_length": 111.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.5968,
"grad_norm": 0.032760150730609894,
"learning_rate": 4.483985765124555e-07,
"loss": 0.0008,
"num_tokens": 183047828.0,
"reward": 1.2245934009552002,
"reward_std": 0.14938510954380035,
"rewards/accuracy_reward_long_step": 0.33984375,
"rewards/final_brier_reward_long_step": 0.738226592540741,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.800771951675415,
"step": 373
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 484.0,
"completions/max_terminated_length": 484.0,
"completions/mean_length": 236.62109375,
"completions/mean_terminated_length": 236.62109375,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.5984,
"grad_norm": 0.035608965903520584,
"learning_rate": 4.466192170818505e-07,
"loss": -0.0005,
"num_tokens": 183518363.0,
"reward": 1.4495090246200562,
"reward_std": 0.1940980851650238,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.6856546401977539,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7530063390731812,
"step": 374
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 480.0,
"completions/max_terminated_length": 480.0,
"completions/mean_length": 253.03125,
"completions/mean_terminated_length": 253.03125,
"completions/min_length": 145.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.6,
"grad_norm": 0.03018086962401867,
"learning_rate": 4.4483985765124553e-07,
"loss": -0.0038,
"num_tokens": 184007499.0,
"reward": 1.4177751541137695,
"reward_std": 0.1175907552242279,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.830146849155426,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7550168037414551,
"step": 375
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 414.0,
"completions/max_terminated_length": 414.0,
"completions/mean_length": 231.6640625,
"completions/mean_terminated_length": 231.6640625,
"completions/min_length": 152.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.6016,
"grad_norm": 0.03529973700642586,
"learning_rate": 4.4306049822064055e-07,
"loss": -0.005,
"num_tokens": 184481933.0,
"reward": 1.4107120037078857,
"reward_std": 0.17283451557159424,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.6837781667709351,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7559454441070557,
"step": 376
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 490.0,
"completions/max_terminated_length": 490.0,
"completions/mean_length": 248.328125,
"completions/mean_terminated_length": 248.328125,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.6032,
"grad_norm": 0.032051555812358856,
"learning_rate": 4.412811387900356e-07,
"loss": 0.0033,
"num_tokens": 184971921.0,
"reward": 1.4520785808563232,
"reward_std": 0.23111991584300995,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.7146296501159668,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8124346733093262,
"step": 377
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 507.0,
"completions/max_terminated_length": 507.0,
"completions/mean_length": 234.9921875,
"completions/mean_terminated_length": 234.9921875,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.6048,
"grad_norm": 0.03357694298028946,
"learning_rate": 4.395017793594306e-07,
"loss": 0.0024,
"num_tokens": 185440015.0,
"reward": 1.429476261138916,
"reward_std": 0.119395412504673,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7937347888946533,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7991704940795898,
"step": 378
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 560.0,
"completions/max_terminated_length": 560.0,
"completions/mean_length": 260.4453125,
"completions/mean_terminated_length": 260.4453125,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.6064,
"grad_norm": 0.029874242842197418,
"learning_rate": 4.377224199288256e-07,
"loss": 0.0054,
"num_tokens": 185943985.0,
"reward": 1.489135980606079,
"reward_std": 0.123079814016819,
"rewards/accuracy_reward_long_step": 0.5546875,
"rewards/final_brier_reward_long_step": 0.8251116871833801,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.9126821160316467,
"step": 379
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 592.0,
"completions/max_terminated_length": 592.0,
"completions/mean_length": 249.58203125,
"completions/mean_terminated_length": 249.58203125,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.608,
"grad_norm": 0.03442827984690666,
"learning_rate": 4.359430604982206e-07,
"loss": -0.0082,
"num_tokens": 186443814.0,
"reward": 1.2993438243865967,
"reward_std": 0.12685778737068176,
"rewards/accuracy_reward_long_step": 0.40625,
"rewards/final_brier_reward_long_step": 0.7798289060592651,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7925466299057007,
"step": 380
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 572.0,
"completions/max_terminated_length": 572.0,
"completions/mean_length": 254.453125,
"completions/mean_terminated_length": 254.453125,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.6096,
"grad_norm": 0.032972000539302826,
"learning_rate": 4.341637010676156e-07,
"loss": -0.002,
"num_tokens": 186948906.0,
"reward": 1.3653643131256104,
"reward_std": 0.1496572494506836,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7358413934707642,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7881159782409668,
"step": 381
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 460.0,
"completions/max_terminated_length": 460.0,
"completions/mean_length": 243.390625,
"completions/mean_terminated_length": 243.390625,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.6112,
"grad_norm": 0.03480248898267746,
"learning_rate": 4.3238434163701063e-07,
"loss": 0.0027,
"num_tokens": 187445558.0,
"reward": 1.545555830001831,
"reward_std": 0.17457936704158783,
"rewards/accuracy_reward_long_step": 0.6171875,
"rewards/final_brier_reward_long_step": 0.865576982498169,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8478966355323792,
"step": 382
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 564.0,
"completions/max_terminated_length": 564.0,
"completions/mean_length": 252.15234375,
"completions/mean_terminated_length": 252.15234375,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.6128,
"grad_norm": 0.03224232792854309,
"learning_rate": 4.306049822064057e-07,
"loss": 0.0002,
"num_tokens": 187939005.0,
"reward": 1.5401490926742554,
"reward_std": 0.1554555892944336,
"rewards/accuracy_reward_long_step": 0.625,
"rewards/final_brier_reward_long_step": 0.8096957206726074,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8509008288383484,
"step": 383
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 557.0,
"completions/max_terminated_length": 557.0,
"completions/mean_length": 249.1328125,
"completions/mean_terminated_length": 249.1328125,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.6144,
"grad_norm": 0.029847221449017525,
"learning_rate": 4.288256227758007e-07,
"loss": 0.0013,
"num_tokens": 188446527.0,
"reward": 1.3999953269958496,
"reward_std": 0.09057177603244781,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.7741625308990479,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.794569194316864,
"step": 384
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 580.0,
"completions/max_terminated_length": 580.0,
"completions/mean_length": 264.25390625,
"completions/mean_terminated_length": 264.25390625,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.616,
"grad_norm": 0.031437478959560394,
"learning_rate": 4.2704626334519573e-07,
"loss": 0.0145,
"num_tokens": 188934968.0,
"reward": 1.4329785108566284,
"reward_std": 0.14642232656478882,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.7519199252128601,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8081189393997192,
"step": 385
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 450.0,
"completions/max_terminated_length": 450.0,
"completions/mean_length": 246.9140625,
"completions/mean_terminated_length": 246.9140625,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.6176,
"grad_norm": 0.04302318021655083,
"learning_rate": 4.2526690391459074e-07,
"loss": -0.0079,
"num_tokens": 189434826.0,
"reward": 1.3921318054199219,
"reward_std": 0.17010337114334106,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.7150309085845947,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6816216111183167,
"step": 386
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 590.0,
"completions/max_terminated_length": 590.0,
"completions/mean_length": 254.37890625,
"completions/mean_terminated_length": 254.37890625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.6192,
"grad_norm": 0.03536539152264595,
"learning_rate": 4.2348754448398576e-07,
"loss": -0.0079,
"num_tokens": 189913979.0,
"reward": 1.4037388563156128,
"reward_std": 0.11961972713470459,
"rewards/accuracy_reward_long_step": 0.515625,
"rewards/final_brier_reward_long_step": 0.738335907459259,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8141195178031921,
"step": 387
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 506.0,
"completions/max_terminated_length": 506.0,
"completions/mean_length": 266.3203125,
"completions/mean_terminated_length": 266.3203125,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.6208,
"grad_norm": 0.03417327627539635,
"learning_rate": 4.217081850533807e-07,
"loss": -0.0023,
"num_tokens": 190421693.0,
"reward": 1.4262065887451172,
"reward_std": 0.10677627474069595,
"rewards/accuracy_reward_long_step": 0.53515625,
"rewards/final_brier_reward_long_step": 0.7934492230415344,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7707524299621582,
"step": 388
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 442.0,
"completions/max_terminated_length": 442.0,
"completions/mean_length": 267.72265625,
"completions/mean_terminated_length": 267.72265625,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.6224,
"grad_norm": 0.03453484922647476,
"learning_rate": 4.199288256227758e-07,
"loss": 0.0004,
"num_tokens": 190935686.0,
"reward": 1.2190345525741577,
"reward_std": 0.14494457840919495,
"rewards/accuracy_reward_long_step": 0.33984375,
"rewards/final_brier_reward_long_step": 0.7603257894515991,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.756437361240387,
"step": 389
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 447.0,
"completions/max_terminated_length": 447.0,
"completions/mean_length": 248.13671875,
"completions/mean_terminated_length": 248.13671875,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.624,
"grad_norm": 0.033618371933698654,
"learning_rate": 4.181494661921708e-07,
"loss": 0.0001,
"num_tokens": 191423305.0,
"reward": 1.367628812789917,
"reward_std": 0.13067224621772766,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.7947276830673218,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7695374488830566,
"step": 390
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 564.0,
"completions/max_terminated_length": 564.0,
"completions/mean_length": 263.71875,
"completions/mean_terminated_length": 263.71875,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.6256,
"grad_norm": 0.0348045788705349,
"learning_rate": 4.163701067615658e-07,
"loss": 0.0067,
"num_tokens": 191928945.0,
"reward": 1.6183103322982788,
"reward_std": 0.16025137901306152,
"rewards/accuracy_reward_long_step": 0.71875,
"rewards/final_brier_reward_long_step": 0.8521628379821777,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7538907527923584,
"step": 391
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 599.0,
"completions/max_terminated_length": 599.0,
"completions/mean_length": 265.98828125,
"completions/mean_terminated_length": 265.98828125,
"completions/min_length": 164.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.6272,
"grad_norm": 0.028777770698070526,
"learning_rate": 4.1459074733096083e-07,
"loss": 0.0073,
"num_tokens": 192431814.0,
"reward": 1.4733824729919434,
"reward_std": 0.14600692689418793,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.7584691047668457,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7913106679916382,
"step": 392
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 462.0,
"completions/max_terminated_length": 462.0,
"completions/mean_length": 263.95703125,
"completions/mean_terminated_length": 263.95703125,
"completions/min_length": 161.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.6288,
"grad_norm": 0.03086771070957184,
"learning_rate": 4.1281138790035585e-07,
"loss": -0.014,
"num_tokens": 192932539.0,
"reward": 1.2810169458389282,
"reward_std": 0.08168387413024902,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.6735238432884216,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7239813804626465,
"step": 393
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 504.0,
"completions/max_terminated_length": 504.0,
"completions/mean_length": 252.90625,
"completions/mean_terminated_length": 253.89805603027344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.6304,
"grad_norm": 0.033842138946056366,
"learning_rate": 4.1103202846975086e-07,
"loss": -0.0147,
"num_tokens": 193433083.0,
"reward": 1.4617502689361572,
"reward_std": 0.16944709420204163,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.8041574358940125,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7694060206413269,
"step": 394
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 527.0,
"completions/max_terminated_length": 527.0,
"completions/mean_length": 252.5546875,
"completions/mean_terminated_length": 252.5546875,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.632,
"grad_norm": 0.035002097487449646,
"learning_rate": 4.0925266903914593e-07,
"loss": 0.0012,
"num_tokens": 193924961.0,
"reward": 1.5998687744140625,
"reward_std": 0.14062157273292542,
"rewards/accuracy_reward_long_step": 0.69140625,
"rewards/final_brier_reward_long_step": 0.7885218858718872,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8453285694122314,
"step": 395
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 537.0,
"completions/max_terminated_length": 537.0,
"completions/mean_length": 264.9765625,
"completions/mean_terminated_length": 264.9765625,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.6336,
"grad_norm": 0.0325743593275547,
"learning_rate": 4.0747330960854094e-07,
"loss": 0.0099,
"num_tokens": 194417523.0,
"reward": 1.5281362533569336,
"reward_std": 0.14003178477287292,
"rewards/accuracy_reward_long_step": 0.6171875,
"rewards/final_brier_reward_long_step": 0.816343367099762,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8274516463279724,
"step": 396
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 524.0,
"completions/max_terminated_length": 524.0,
"completions/mean_length": 266.8125,
"completions/mean_terminated_length": 266.8125,
"completions/min_length": 174.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.6352,
"grad_norm": 0.044015269726514816,
"learning_rate": 4.0569395017793596e-07,
"loss": -0.0169,
"num_tokens": 194911235.0,
"reward": 1.1511602401733398,
"reward_std": 0.0747460424900055,
"rewards/accuracy_reward_long_step": 0.27734375,
"rewards/final_brier_reward_long_step": 0.7376371622085571,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7576285004615784,
"step": 397
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 502.0,
"completions/max_terminated_length": 502.0,
"completions/mean_length": 258.0625,
"completions/mean_terminated_length": 258.0625,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.6368,
"grad_norm": 0.035533856600522995,
"learning_rate": 4.039145907473309e-07,
"loss": 0.0121,
"num_tokens": 195414379.0,
"reward": 1.45721435546875,
"reward_std": 0.18415778875350952,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.7342562675476074,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8289762735366821,
"step": 398
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 487.0,
"completions/max_terminated_length": 487.0,
"completions/mean_length": 260.1484375,
"completions/mean_terminated_length": 260.1484375,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.6384,
"grad_norm": 0.04100572690367699,
"learning_rate": 4.0213523131672593e-07,
"loss": 0.0142,
"num_tokens": 195921505.0,
"reward": 1.4241199493408203,
"reward_std": 0.12187729775905609,
"rewards/accuracy_reward_long_step": 0.53515625,
"rewards/final_brier_reward_long_step": 0.7385929822921753,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8172620534896851,
"step": 399
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 545.0,
"completions/max_terminated_length": 545.0,
"completions/mean_length": 260.5234375,
"completions/mean_terminated_length": 260.5234375,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.64,
"grad_norm": 0.03168834373354912,
"learning_rate": 4.0035587188612095e-07,
"loss": 0.015,
"num_tokens": 196418567.0,
"reward": 1.468137264251709,
"reward_std": 0.12736788392066956,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.796457052230835,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8885919451713562,
"step": 400
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 451.0,
"completions/max_terminated_length": 451.0,
"completions/mean_length": 259.34765625,
"completions/mean_terminated_length": 259.34765625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.6416,
"grad_norm": 0.07957521826028824,
"learning_rate": 3.98576512455516e-07,
"loss": 0.0156,
"num_tokens": 196899368.0,
"reward": 1.3013485670089722,
"reward_std": 0.1320360153913498,
"rewards/accuracy_reward_long_step": 0.41796875,
"rewards/final_brier_reward_long_step": 0.7306581735610962,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8028608560562134,
"step": 401
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 417.0,
"completions/max_terminated_length": 417.0,
"completions/mean_length": 258.40234375,
"completions/mean_terminated_length": 258.40234375,
"completions/min_length": 172.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.6432,
"grad_norm": 0.04176841303706169,
"learning_rate": 3.9679715302491103e-07,
"loss": 0.0149,
"num_tokens": 197393599.0,
"reward": 1.4583325386047363,
"reward_std": 0.17372997105121613,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7547035217285156,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8286267518997192,
"step": 402
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 430.0,
"completions/max_terminated_length": 430.0,
"completions/mean_length": 262.3828125,
"completions/mean_terminated_length": 262.3828125,
"completions/min_length": 111.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.6448,
"grad_norm": 0.031080788001418114,
"learning_rate": 3.9501779359430604e-07,
"loss": -0.008,
"num_tokens": 197880809.0,
"reward": 1.3304669857025146,
"reward_std": 0.11997392773628235,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.7705242037773132,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7700934410095215,
"step": 403
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 539.0,
"completions/max_terminated_length": 539.0,
"completions/mean_length": 257.9296875,
"completions/mean_terminated_length": 257.9296875,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.6464,
"grad_norm": 0.0383228063583374,
"learning_rate": 3.9323843416370106e-07,
"loss": 0.0168,
"num_tokens": 198364775.0,
"reward": 1.4343302249908447,
"reward_std": 0.13839168846607208,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7040433883666992,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7832778692245483,
"step": 404
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 245.3828125,
"completions/mean_terminated_length": 245.3828125,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.648,
"grad_norm": 0.04407874867320061,
"learning_rate": 3.9145907473309607e-07,
"loss": -0.0001,
"num_tokens": 198837905.0,
"reward": 1.4450054168701172,
"reward_std": 0.15000846982002258,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.7329072952270508,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.765864372253418,
"step": 405
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 488.0,
"completions/max_terminated_length": 488.0,
"completions/mean_length": 248.64453125,
"completions/mean_terminated_length": 248.64453125,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.6496,
"grad_norm": 0.035016220062971115,
"learning_rate": 3.896797153024911e-07,
"loss": -0.0174,
"num_tokens": 199319566.0,
"reward": 1.3514586687088013,
"reward_std": 0.09576141834259033,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.7985478639602661,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7166616916656494,
"step": 406
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 486.0,
"completions/max_terminated_length": 486.0,
"completions/mean_length": 270.6796875,
"completions/mean_terminated_length": 270.6796875,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.6512,
"grad_norm": 0.03493834286928177,
"learning_rate": 3.879003558718861e-07,
"loss": 0.005,
"num_tokens": 199798012.0,
"reward": 1.2670437097549438,
"reward_std": 0.1575869619846344,
"rewards/accuracy_reward_long_step": 0.375,
"rewards/final_brier_reward_long_step": 0.7614452242851257,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8067296743392944,
"step": 407
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 603.0,
"completions/max_terminated_length": 603.0,
"completions/mean_length": 257.234375,
"completions/mean_terminated_length": 257.234375,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.6528,
"grad_norm": 0.17604607343673706,
"learning_rate": 3.861209964412811e-07,
"loss": -0.0137,
"num_tokens": 200299744.0,
"reward": 1.2874679565429688,
"reward_std": 0.11275781691074371,
"rewards/accuracy_reward_long_step": 0.3984375,
"rewards/final_brier_reward_long_step": 0.7716461420059204,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7844761610031128,
"step": 408
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 490.0,
"completions/max_terminated_length": 490.0,
"completions/mean_length": 245.10546875,
"completions/mean_terminated_length": 245.10546875,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.6544,
"grad_norm": 0.03474588319659233,
"learning_rate": 3.8434163701067613e-07,
"loss": 0.0102,
"num_tokens": 200789587.0,
"reward": 1.3615117073059082,
"reward_std": 0.13387925922870636,
"rewards/accuracy_reward_long_step": 0.51171875,
"rewards/final_brier_reward_long_step": 0.6371433734893799,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7620280385017395,
"step": 409
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 431.0,
"completions/max_terminated_length": 431.0,
"completions/mean_length": 238.515625,
"completions/mean_terminated_length": 238.515625,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.656,
"grad_norm": 0.036233462393283844,
"learning_rate": 3.8256227758007115e-07,
"loss": 0.0044,
"num_tokens": 201275391.0,
"reward": 1.3896780014038086,
"reward_std": 0.13047534227371216,
"rewards/accuracy_reward_long_step": 0.515625,
"rewards/final_brier_reward_long_step": 0.6849026679992676,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8113091588020325,
"step": 410
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 504.0,
"completions/max_terminated_length": 504.0,
"completions/mean_length": 241.09375,
"completions/mean_terminated_length": 241.09375,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.6576,
"grad_norm": 0.03837813064455986,
"learning_rate": 3.8078291814946616e-07,
"loss": -0.0032,
"num_tokens": 201756703.0,
"reward": 1.3157711029052734,
"reward_std": 0.10341217368841171,
"rewards/accuracy_reward_long_step": 0.45703125,
"rewards/final_brier_reward_long_step": 0.7142456769943237,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7207139134407043,
"step": 411
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 591.0,
"completions/max_terminated_length": 591.0,
"completions/mean_length": 238.15234375,
"completions/mean_terminated_length": 238.15234375,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.6592,
"grad_norm": 0.0354132242500782,
"learning_rate": 3.790035587188612e-07,
"loss": 0.0055,
"num_tokens": 202225950.0,
"reward": 1.4732050895690918,
"reward_std": 0.1469092220067978,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.7194160223007202,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8218415975570679,
"step": 412
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 491.0,
"completions/max_terminated_length": 491.0,
"completions/mean_length": 247.93359375,
"completions/mean_terminated_length": 247.93359375,
"completions/min_length": 84.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.6608,
"grad_norm": 0.046107884496450424,
"learning_rate": 3.7722419928825624e-07,
"loss": -0.0093,
"num_tokens": 202709477.0,
"reward": 1.3391033411026,
"reward_std": 0.08157768845558167,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.6804527044296265,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8165856599807739,
"step": 413
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 441.0,
"completions/max_terminated_length": 441.0,
"completions/mean_length": 225.625,
"completions/mean_terminated_length": 225.625,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.6624,
"grad_norm": 0.04497173801064491,
"learning_rate": 3.7544483985765126e-07,
"loss": 0.0046,
"num_tokens": 203200421.0,
"reward": 1.4665465354919434,
"reward_std": 0.10675959289073944,
"rewards/accuracy_reward_long_step": 0.6015625,
"rewards/final_brier_reward_long_step": 0.7531781196594238,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7067579030990601,
"step": 414
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 511.0,
"completions/max_terminated_length": 511.0,
"completions/mean_length": 254.66015625,
"completions/mean_terminated_length": 254.66015625,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.664,
"grad_norm": 0.037333909422159195,
"learning_rate": 3.7366548042704627e-07,
"loss": -0.005,
"num_tokens": 203701798.0,
"reward": 1.374776840209961,
"reward_std": 0.12094822525978088,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.7455586194992065,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.753548264503479,
"step": 415
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 482.0,
"completions/max_terminated_length": 482.0,
"completions/mean_length": 226.84375,
"completions/mean_terminated_length": 226.84375,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.6656,
"grad_norm": 0.039919789880514145,
"learning_rate": 3.718861209964413e-07,
"loss": 0.025,
"num_tokens": 204175030.0,
"reward": 1.562011480331421,
"reward_std": 0.06771315634250641,
"rewards/accuracy_reward_long_step": 0.64453125,
"rewards/final_brier_reward_long_step": 0.8121625185012817,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8577582836151123,
"step": 416
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 409.0,
"completions/max_terminated_length": 409.0,
"completions/mean_length": 243.203125,
"completions/mean_terminated_length": 243.203125,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.6672,
"grad_norm": 0.04114522784948349,
"learning_rate": 3.7010676156583625e-07,
"loss": 0.0153,
"num_tokens": 204655778.0,
"reward": 1.4633723497390747,
"reward_std": 0.0920601338148117,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.8287187218666077,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8528955578804016,
"step": 417
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 606.0,
"completions/max_terminated_length": 606.0,
"completions/mean_length": 244.66015625,
"completions/mean_terminated_length": 244.66015625,
"completions/min_length": 74.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.6688,
"grad_norm": 0.0328848697245121,
"learning_rate": 3.6832740213523126e-07,
"loss": 0.005,
"num_tokens": 205149811.0,
"reward": 1.3774842023849487,
"reward_std": 0.13209398090839386,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.805209755897522,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.751602292060852,
"step": 418
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 447.0,
"completions/max_terminated_length": 447.0,
"completions/mean_length": 235.80859375,
"completions/mean_terminated_length": 235.80859375,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.6704,
"grad_norm": 0.04273487254977226,
"learning_rate": 3.6654804270462633e-07,
"loss": 0.0065,
"num_tokens": 205628714.0,
"reward": 1.306333303451538,
"reward_std": 0.1178286001086235,
"rewards/accuracy_reward_long_step": 0.41796875,
"rewards/final_brier_reward_long_step": 0.7348085641860962,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8186495304107666,
"step": 419
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 581.0,
"completions/max_terminated_length": 581.0,
"completions/mean_length": 244.96484375,
"completions/mean_terminated_length": 244.96484375,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.672,
"grad_norm": 0.04026668146252632,
"learning_rate": 3.6476868327402134e-07,
"loss": 0.0007,
"num_tokens": 206110425.0,
"reward": 1.3591866493225098,
"reward_std": 0.13395720720291138,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.7895034551620483,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7566181421279907,
"step": 420
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 590.0,
"completions/max_terminated_length": 590.0,
"completions/mean_length": 244.67578125,
"completions/mean_terminated_length": 244.67578125,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.6736,
"grad_norm": 0.040121398866176605,
"learning_rate": 3.6298932384341636e-07,
"loss": -0.0165,
"num_tokens": 206586750.0,
"reward": 1.3462982177734375,
"reward_std": 0.140256866812706,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.6518319845199585,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8271111249923706,
"step": 421
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 382.0,
"completions/max_terminated_length": 382.0,
"completions/mean_length": 234.125,
"completions/mean_terminated_length": 234.125,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.6752,
"grad_norm": 0.04478037729859352,
"learning_rate": 3.6120996441281137e-07,
"loss": 0.0001,
"num_tokens": 207062502.0,
"reward": 1.5341248512268066,
"reward_std": 0.12208271771669388,
"rewards/accuracy_reward_long_step": 0.63671875,
"rewards/final_brier_reward_long_step": 0.7614644765853882,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8281601071357727,
"step": 422
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 460.0,
"completions/max_terminated_length": 460.0,
"completions/mean_length": 246.85546875,
"completions/mean_terminated_length": 246.85546875,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.6768,
"grad_norm": 0.041672661900520325,
"learning_rate": 3.594306049822064e-07,
"loss": 0.0102,
"num_tokens": 207558433.0,
"reward": 1.2478289604187012,
"reward_std": 0.09173645079135895,
"rewards/accuracy_reward_long_step": 0.34375,
"rewards/final_brier_reward_long_step": 0.8001093864440918,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8162060976028442,
"step": 423
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 406.0,
"completions/max_terminated_length": 406.0,
"completions/mean_length": 235.1484375,
"completions/mean_terminated_length": 235.1484375,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.6784,
"grad_norm": 0.04561910033226013,
"learning_rate": 3.576512455516014e-07,
"loss": -0.0036,
"num_tokens": 208031183.0,
"reward": 1.43558931350708,
"reward_std": 0.09829960763454437,
"rewards/accuracy_reward_long_step": 0.51171875,
"rewards/final_brier_reward_long_step": 0.8206312656402588,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8748506903648376,
"step": 424
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 493.0,
"completions/max_terminated_length": 493.0,
"completions/mean_length": 252.4140625,
"completions/mean_terminated_length": 252.4140625,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.68,
"grad_norm": 0.039236631244421005,
"learning_rate": 3.5587188612099647e-07,
"loss": -0.0036,
"num_tokens": 208527737.0,
"reward": 1.433516263961792,
"reward_std": 0.17410725355148315,
"rewards/accuracy_reward_long_step": 0.53515625,
"rewards/final_brier_reward_long_step": 0.7744226455688477,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8268297910690308,
"step": 425
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 464.0,
"completions/max_terminated_length": 464.0,
"completions/mean_length": 223.62890625,
"completions/mean_terminated_length": 223.62890625,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.6816,
"grad_norm": 0.034077707678079605,
"learning_rate": 3.540925266903915e-07,
"loss": -0.0108,
"num_tokens": 209000154.0,
"reward": 1.3932843208312988,
"reward_std": 0.08521192520856857,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.9108027219772339,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8498345017433167,
"step": 426
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 882.0,
"completions/max_terminated_length": 882.0,
"completions/mean_length": 240.484375,
"completions/mean_terminated_length": 240.484375,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.6832,
"grad_norm": 0.04128441587090492,
"learning_rate": 3.5231316725978644e-07,
"loss": -0.0074,
"num_tokens": 209489150.0,
"reward": 1.565014123916626,
"reward_std": 0.15992087125778198,
"rewards/accuracy_reward_long_step": 0.671875,
"rewards/final_brier_reward_long_step": 0.7421960830688477,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8381730318069458,
"step": 427
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 471.0,
"completions/max_terminated_length": 471.0,
"completions/mean_length": 244.76953125,
"completions/mean_terminated_length": 244.76953125,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.6848,
"grad_norm": 0.043866030871868134,
"learning_rate": 3.5053380782918146e-07,
"loss": -0.0145,
"num_tokens": 209985339.0,
"reward": 1.3970236778259277,
"reward_std": 0.16603073477745056,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.7696589827537537,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8262485861778259,
"step": 428
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 520.0,
"completions/max_terminated_length": 520.0,
"completions/mean_length": 234.33984375,
"completions/mean_terminated_length": 234.33984375,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.6864,
"grad_norm": 0.042403049767017365,
"learning_rate": 3.4875444839857647e-07,
"loss": 0.0113,
"num_tokens": 210472786.0,
"reward": 1.5308232307434082,
"reward_std": 0.12255808711051941,
"rewards/accuracy_reward_long_step": 0.63671875,
"rewards/final_brier_reward_long_step": 0.7184535264968872,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8579643964767456,
"step": 429
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 550.0,
"completions/max_terminated_length": 550.0,
"completions/mean_length": 230.375,
"completions/mean_terminated_length": 230.375,
"completions/min_length": 76.0,
"completions/min_terminated_length": 76.0,
"epoch": 0.688,
"grad_norm": 0.040890295058488846,
"learning_rate": 3.469750889679715e-07,
"loss": 0.0102,
"num_tokens": 210951450.0,
"reward": 1.3568546772003174,
"reward_std": 0.20398034155368805,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.7493456602096558,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7952607274055481,
"step": 430
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 497.0,
"completions/max_terminated_length": 497.0,
"completions/mean_length": 240.0,
"completions/mean_terminated_length": 240.0,
"completions/min_length": 87.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.6896,
"grad_norm": 0.03938218578696251,
"learning_rate": 3.4519572953736656e-07,
"loss": 0.0071,
"num_tokens": 211431050.0,
"reward": 1.369812250137329,
"reward_std": 0.16209545731544495,
"rewards/accuracy_reward_long_step": 0.4921875,
"rewards/final_brier_reward_long_step": 0.7736667990684509,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7368321418762207,
"step": 431
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 464.0,
"completions/max_terminated_length": 464.0,
"completions/mean_length": 238.2109375,
"completions/mean_terminated_length": 238.2109375,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.6912,
"grad_norm": 0.04615631699562073,
"learning_rate": 3.4341637010676157e-07,
"loss": 0.0087,
"num_tokens": 211919552.0,
"reward": 1.415741205215454,
"reward_std": 0.0865730568766594,
"rewards/accuracy_reward_long_step": 0.5078125,
"rewards/final_brier_reward_long_step": 0.7965086102485657,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8352065682411194,
"step": 432
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 500.0,
"completions/max_terminated_length": 500.0,
"completions/mean_length": 230.30078125,
"completions/mean_terminated_length": 230.30078125,
"completions/min_length": 68.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.6928,
"grad_norm": 0.045928288251161575,
"learning_rate": 3.416370106761566e-07,
"loss": 0.0084,
"num_tokens": 212409581.0,
"reward": 1.4080349206924438,
"reward_std": 0.10854038596153259,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.8253128528594971,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8224518895149231,
"step": 433
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 513.0,
"completions/max_terminated_length": 513.0,
"completions/mean_length": 243.75390625,
"completions/mean_terminated_length": 243.75390625,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.6944,
"grad_norm": 0.036884017288684845,
"learning_rate": 3.398576512455516e-07,
"loss": -0.01,
"num_tokens": 212874262.0,
"reward": 1.3945372104644775,
"reward_std": 0.1757126748561859,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.8027616143226624,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7597622275352478,
"step": 434
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 457.0,
"completions/max_terminated_length": 457.0,
"completions/mean_length": 226.0,
"completions/mean_terminated_length": 226.0,
"completions/min_length": 66.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.696,
"grad_norm": 0.03966812789440155,
"learning_rate": 3.380782918149466e-07,
"loss": 0.0137,
"num_tokens": 213355310.0,
"reward": 1.436761736869812,
"reward_std": 0.10320307314395905,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.7525194883346558,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7914024591445923,
"step": 435
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 600.0,
"completions/max_terminated_length": 600.0,
"completions/mean_length": 240.4921875,
"completions/mean_terminated_length": 240.4921875,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.6976,
"grad_norm": 0.04616158828139305,
"learning_rate": 3.3629893238434163e-07,
"loss": -0.0162,
"num_tokens": 213835492.0,
"reward": 1.345383882522583,
"reward_std": 0.14034321904182434,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.783481240272522,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8168047666549683,
"step": 436
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 565.0,
"completions/max_terminated_length": 565.0,
"completions/mean_length": 225.7578125,
"completions/mean_terminated_length": 225.7578125,
"completions/min_length": 100.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.6992,
"grad_norm": 0.04164176806807518,
"learning_rate": 3.3451957295373664e-07,
"loss": 0.0035,
"num_tokens": 214322966.0,
"reward": 1.3548498153686523,
"reward_std": 0.14276200532913208,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.8077800273895264,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7991191744804382,
"step": 437
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 502.0,
"completions/max_terminated_length": 502.0,
"completions/mean_length": 226.65625,
"completions/mean_terminated_length": 226.65625,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.7008,
"grad_norm": 0.03757995367050171,
"learning_rate": 3.3274021352313166e-07,
"loss": 0.0176,
"num_tokens": 214794966.0,
"reward": 1.5220391750335693,
"reward_std": 0.1383122354745865,
"rewards/accuracy_reward_long_step": 0.62109375,
"rewards/final_brier_reward_long_step": 0.7596441507339478,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8441376686096191,
"step": 438
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 592.0,
"completions/max_terminated_length": 592.0,
"completions/mean_length": 237.671875,
"completions/mean_terminated_length": 237.671875,
"completions/min_length": 74.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.7024,
"grad_norm": 0.04206470027565956,
"learning_rate": 3.3096085409252667e-07,
"loss": 0.0228,
"num_tokens": 215274386.0,
"reward": 1.4969501495361328,
"reward_std": 0.1166161373257637,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.8239851593971252,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.820065438747406,
"step": 439
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 520.0,
"completions/max_terminated_length": 520.0,
"completions/mean_length": 241.5390625,
"completions/mean_terminated_length": 241.5390625,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.704,
"grad_norm": 0.03900426998734474,
"learning_rate": 3.291814946619217e-07,
"loss": -0.0161,
"num_tokens": 215761204.0,
"reward": 1.4888627529144287,
"reward_std": 0.07008583098649979,
"rewards/accuracy_reward_long_step": 0.61328125,
"rewards/final_brier_reward_long_step": 0.7496683597564697,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7526575326919556,
"step": 440
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 416.0,
"completions/max_terminated_length": 416.0,
"completions/mean_length": 226.73828125,
"completions/mean_terminated_length": 226.73828125,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.7056,
"grad_norm": 0.04039409011602402,
"learning_rate": 3.274021352313167e-07,
"loss": 0.0156,
"num_tokens": 216251281.0,
"reward": 1.4942941665649414,
"reward_std": 0.14357957243919373,
"rewards/accuracy_reward_long_step": 0.60546875,
"rewards/final_brier_reward_long_step": 0.7177531123161316,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8375486135482788,
"step": 441
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 486.0,
"completions/max_terminated_length": 486.0,
"completions/mean_length": 224.7421875,
"completions/mean_terminated_length": 225.62353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 65.0,
"epoch": 0.7072,
"grad_norm": 0.03879899904131889,
"learning_rate": 3.256227758007117e-07,
"loss": -0.0177,
"num_tokens": 216745127.0,
"reward": 1.3038554191589355,
"reward_std": 0.1679355949163437,
"rewards/accuracy_reward_long_step": 0.421875,
"rewards/final_brier_reward_long_step": 0.7172562479972839,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8184775114059448,
"step": 442
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 528.0,
"completions/max_terminated_length": 528.0,
"completions/mean_length": 225.91796875,
"completions/mean_terminated_length": 225.91796875,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.7088,
"grad_norm": 0.04107962176203728,
"learning_rate": 3.238434163701068e-07,
"loss": 0.0022,
"num_tokens": 217241754.0,
"reward": 1.3336093425750732,
"reward_std": 0.11196212470531464,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.7749031186103821,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.85640949010849,
"step": 443
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 585.0,
"completions/max_terminated_length": 585.0,
"completions/mean_length": 226.2890625,
"completions/mean_terminated_length": 226.2890625,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.7104,
"grad_norm": 0.04981570690870285,
"learning_rate": 3.220640569395018e-07,
"loss": -0.0113,
"num_tokens": 217723420.0,
"reward": 1.4336767196655273,
"reward_std": 0.12888742983341217,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.7152671813964844,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7538148760795593,
"step": 444
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 520.0,
"completions/max_terminated_length": 520.0,
"completions/mean_length": 220.1328125,
"completions/mean_terminated_length": 220.1328125,
"completions/min_length": 79.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.712,
"grad_norm": 0.043584324419498444,
"learning_rate": 3.202846975088968e-07,
"loss": 0.0119,
"num_tokens": 218212494.0,
"reward": 1.4643619060516357,
"reward_std": 0.12725988030433655,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.7841925621032715,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8076297640800476,
"step": 445
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 395.0,
"completions/max_terminated_length": 395.0,
"completions/mean_length": 230.3515625,
"completions/mean_terminated_length": 230.3515625,
"completions/min_length": 66.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.7136,
"grad_norm": 0.03610699251294136,
"learning_rate": 3.1850533807829177e-07,
"loss": -0.0169,
"num_tokens": 218694184.0,
"reward": 1.559215784072876,
"reward_std": 0.07399096339941025,
"rewards/accuracy_reward_long_step": 0.66015625,
"rewards/final_brier_reward_long_step": 0.790971040725708,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8052672147750854,
"step": 446
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 466.0,
"completions/max_terminated_length": 466.0,
"completions/mean_length": 217.97265625,
"completions/mean_terminated_length": 217.97265625,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.7152,
"grad_norm": 0.0351036936044693,
"learning_rate": 3.167259786476868e-07,
"loss": -0.0161,
"num_tokens": 219155313.0,
"reward": 1.5995757579803467,
"reward_std": 0.08523780107498169,
"rewards/accuracy_reward_long_step": 0.7109375,
"rewards/final_brier_reward_long_step": 0.7231503129005432,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8314027786254883,
"step": 447
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 589.0,
"completions/max_terminated_length": 589.0,
"completions/mean_length": 248.65234375,
"completions/mean_terminated_length": 248.65234375,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.7168,
"grad_norm": 0.04068749397993088,
"learning_rate": 3.149466192170818e-07,
"loss": 0.0197,
"num_tokens": 219654384.0,
"reward": 1.4963853359222412,
"reward_std": 0.1300518661737442,
"rewards/accuracy_reward_long_step": 0.578125,
"rewards/final_brier_reward_long_step": 0.794231653213501,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8788096904754639,
"step": 448
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 595.0,
"completions/max_terminated_length": 595.0,
"completions/mean_length": 225.171875,
"completions/mean_terminated_length": 225.171875,
"completions/min_length": 72.0,
"completions/min_terminated_length": 72.0,
"epoch": 0.7184,
"grad_norm": 0.037970248609781265,
"learning_rate": 3.1316725978647687e-07,
"loss": 0.0184,
"num_tokens": 220128044.0,
"reward": 1.2982159852981567,
"reward_std": 0.18391726911067963,
"rewards/accuracy_reward_long_step": 0.421875,
"rewards/final_brier_reward_long_step": 0.7201941013336182,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7851696014404297,
"step": 449
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 462.0,
"completions/max_terminated_length": 462.0,
"completions/mean_length": 233.64453125,
"completions/mean_terminated_length": 233.64453125,
"completions/min_length": 79.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.72,
"grad_norm": 0.040078479796648026,
"learning_rate": 3.113879003558719e-07,
"loss": -0.0011,
"num_tokens": 220588769.0,
"reward": 1.5124320983886719,
"reward_std": 0.10843676328659058,
"rewards/accuracy_reward_long_step": 0.6015625,
"rewards/final_brier_reward_long_step": 0.7919909954071045,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8593000173568726,
"step": 450
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 494.0,
"completions/max_terminated_length": 494.0,
"completions/mean_length": 217.15234375,
"completions/mean_terminated_length": 217.15234375,
"completions/min_length": 74.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.7216,
"grad_norm": 0.03554774448275566,
"learning_rate": 3.096085409252669e-07,
"loss": 0.007,
"num_tokens": 221075272.0,
"reward": 1.4489461183547974,
"reward_std": 0.10056018829345703,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.811571478843689,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.85921311378479,
"step": 451
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 508.0,
"completions/max_terminated_length": 508.0,
"completions/mean_length": 220.69140625,
"completions/mean_terminated_length": 220.69140625,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.7232,
"grad_norm": 0.03730124980211258,
"learning_rate": 3.078291814946619e-07,
"loss": -0.0023,
"num_tokens": 221558689.0,
"reward": 1.3989410400390625,
"reward_std": 0.10487143695354462,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.6452429294586182,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7473963499069214,
"step": 452
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 518.0,
"completions/max_terminated_length": 518.0,
"completions/mean_length": 217.04296875,
"completions/mean_terminated_length": 217.04296875,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.7248,
"grad_norm": 0.038142986595630646,
"learning_rate": 3.0604982206405693e-07,
"loss": -0.025,
"num_tokens": 222026756.0,
"reward": 1.5077660083770752,
"reward_std": 0.146861732006073,
"rewards/accuracy_reward_long_step": 0.625,
"rewards/final_brier_reward_long_step": 0.6923167705535889,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8387469053268433,
"step": 453
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 453.0,
"completions/max_terminated_length": 453.0,
"completions/mean_length": 219.828125,
"completions/mean_terminated_length": 219.828125,
"completions/min_length": 74.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.7264,
"grad_norm": 0.039899833500385284,
"learning_rate": 3.0427046263345194e-07,
"loss": -0.0029,
"num_tokens": 222498648.0,
"reward": 1.468416452407837,
"reward_std": 0.08466193825006485,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.8156249523162842,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8861656188964844,
"step": 454
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 492.0,
"completions/max_terminated_length": 492.0,
"completions/mean_length": 226.6796875,
"completions/mean_terminated_length": 226.6796875,
"completions/min_length": 82.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.728,
"grad_norm": 0.04437141865491867,
"learning_rate": 3.02491103202847e-07,
"loss": 0.009,
"num_tokens": 222986366.0,
"reward": 1.3412034511566162,
"reward_std": 0.17921078205108643,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.7214792966842651,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8464598655700684,
"step": 455
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 463.0,
"completions/max_terminated_length": 463.0,
"completions/mean_length": 231.34765625,
"completions/mean_terminated_length": 231.34765625,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.7296,
"grad_norm": 0.03869875520467758,
"learning_rate": 3.0071174377224197e-07,
"loss": 0.0084,
"num_tokens": 223455831.0,
"reward": 1.5209224224090576,
"reward_std": 0.1157989650964737,
"rewards/accuracy_reward_long_step": 0.6328125,
"rewards/final_brier_reward_long_step": 0.7708175778388977,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7816216945648193,
"step": 456
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 430.0,
"completions/max_terminated_length": 430.0,
"completions/mean_length": 227.9765625,
"completions/mean_terminated_length": 227.9765625,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.7312,
"grad_norm": 0.04939800873398781,
"learning_rate": 2.98932384341637e-07,
"loss": -0.0007,
"num_tokens": 223944633.0,
"reward": 1.2755203247070312,
"reward_std": 0.16737382113933563,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.694128155708313,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7517032623291016,
"step": 457
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 434.0,
"completions/max_terminated_length": 434.0,
"completions/mean_length": 209.98046875,
"completions/mean_terminated_length": 209.98046875,
"completions/min_length": 85.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.7328,
"grad_norm": 0.0375138595700264,
"learning_rate": 2.97153024911032e-07,
"loss": 0.0098,
"num_tokens": 224409132.0,
"reward": 1.4265563488006592,
"reward_std": 0.17411382496356964,
"rewards/accuracy_reward_long_step": 0.53515625,
"rewards/final_brier_reward_long_step": 0.7519593834877014,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8214536309242249,
"step": 458
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 536.0,
"completions/max_terminated_length": 536.0,
"completions/mean_length": 220.8828125,
"completions/mean_terminated_length": 220.8828125,
"completions/min_length": 66.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.7344,
"grad_norm": 0.03728632628917694,
"learning_rate": 2.95373665480427e-07,
"loss": -0.0191,
"num_tokens": 224884646.0,
"reward": 1.4635272026062012,
"reward_std": 0.08029723912477493,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7463421821594238,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8577666282653809,
"step": 459
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 674.0,
"completions/max_terminated_length": 674.0,
"completions/mean_length": 234.73828125,
"completions/mean_terminated_length": 234.73828125,
"completions/min_length": 93.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.736,
"grad_norm": 0.032615575939416885,
"learning_rate": 2.9359430604982203e-07,
"loss": 0.0027,
"num_tokens": 225385123.0,
"reward": 1.5013632774353027,
"reward_std": 0.09813569486141205,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.8148428201675415,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8312351703643799,
"step": 460
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 461.0,
"completions/max_terminated_length": 461.0,
"completions/mean_length": 231.265625,
"completions/mean_terminated_length": 231.265625,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.7376,
"grad_norm": 0.038810133934020996,
"learning_rate": 2.918149466192171e-07,
"loss": 0.0063,
"num_tokens": 225872791.0,
"reward": 1.4602744579315186,
"reward_std": 0.15246982872486115,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.8496265411376953,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8352211713790894,
"step": 461
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 461.0,
"completions/max_terminated_length": 461.0,
"completions/mean_length": 226.99609375,
"completions/mean_terminated_length": 226.99609375,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.7392,
"grad_norm": 0.037601787596940994,
"learning_rate": 2.900355871886121e-07,
"loss": -0.0017,
"num_tokens": 226377910.0,
"reward": 1.4851224422454834,
"reward_std": 0.12114303559064865,
"rewards/accuracy_reward_long_step": 0.6015625,
"rewards/final_brier_reward_long_step": 0.691071093082428,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8431686162948608,
"step": 462
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 422.0,
"completions/max_terminated_length": 422.0,
"completions/mean_length": 222.1015625,
"completions/mean_terminated_length": 222.1015625,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.7408,
"grad_norm": 0.03438973426818848,
"learning_rate": 2.882562277580071e-07,
"loss": 0.0082,
"num_tokens": 226869616.0,
"reward": 1.5410441160202026,
"reward_std": 0.10235248506069183,
"rewards/accuracy_reward_long_step": 0.65625,
"rewards/final_brier_reward_long_step": 0.7193183302879333,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8198581337928772,
"step": 463
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 383.0,
"completions/max_terminated_length": 383.0,
"completions/mean_length": 220.09375,
"completions/mean_terminated_length": 220.09375,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.7424,
"grad_norm": 0.055105436593294144,
"learning_rate": 2.8647686832740214e-07,
"loss": -0.0036,
"num_tokens": 227356184.0,
"reward": 1.388896107673645,
"reward_std": 0.10595919191837311,
"rewards/accuracy_reward_long_step": 0.5546875,
"rewards/final_brier_reward_long_step": 0.5826694965362549,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7541650533676147,
"step": 464
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 575.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 223.9765625,
"completions/mean_terminated_length": 223.9765625,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.744,
"grad_norm": 0.033698540180921555,
"learning_rate": 2.8469750889679715e-07,
"loss": 0.0016,
"num_tokens": 227845482.0,
"reward": 1.5462651252746582,
"reward_std": 0.10527972877025604,
"rewards/accuracy_reward_long_step": 0.62890625,
"rewards/final_brier_reward_long_step": 0.8236533403396606,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8457825183868408,
"step": 465
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 639.0,
"completions/max_terminated_length": 639.0,
"completions/mean_length": 229.96484375,
"completions/mean_terminated_length": 229.96484375,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.7456,
"grad_norm": 0.05342903360724449,
"learning_rate": 2.829181494661921e-07,
"loss": 0.0089,
"num_tokens": 228346801.0,
"reward": 1.3685379028320312,
"reward_std": 0.1639987826347351,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7799558639526367,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7723206281661987,
"step": 466
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 508.0,
"completions/max_terminated_length": 508.0,
"completions/mean_length": 230.171875,
"completions/mean_terminated_length": 230.171875,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.7472,
"grad_norm": 0.037923168390989304,
"learning_rate": 2.811387900355872e-07,
"loss": -0.0086,
"num_tokens": 228834093.0,
"reward": 1.4359982013702393,
"reward_std": 0.12845033407211304,
"rewards/accuracy_reward_long_step": 0.53125,
"rewards/final_brier_reward_long_step": 0.7942116856575012,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.824781060218811,
"step": 467
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 226.984375,
"completions/mean_terminated_length": 226.984375,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.7488,
"grad_norm": 0.040297914296388626,
"learning_rate": 2.793594306049822e-07,
"loss": -0.012,
"num_tokens": 229311481.0,
"reward": 1.357725977897644,
"reward_std": 0.11814339458942413,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.7514722347259521,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.820056676864624,
"step": 468
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 468.0,
"completions/max_terminated_length": 468.0,
"completions/mean_length": 220.39453125,
"completions/mean_terminated_length": 220.39453125,
"completions/min_length": 96.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.7504,
"grad_norm": 0.048747822642326355,
"learning_rate": 2.775800711743772e-07,
"loss": 0.0175,
"num_tokens": 229784622.0,
"reward": 1.3947033882141113,
"reward_std": 0.13809074461460114,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.7879499793052673,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.775239109992981,
"step": 469
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 457.0,
"completions/max_terminated_length": 457.0,
"completions/mean_length": 220.22265625,
"completions/mean_terminated_length": 220.22265625,
"completions/min_length": 96.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.752,
"grad_norm": 0.05068999528884888,
"learning_rate": 2.758007117437722e-07,
"loss": -0.0182,
"num_tokens": 230259999.0,
"reward": 1.2983078956604004,
"reward_std": 0.10855422168970108,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.7253687381744385,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7647379040718079,
"step": 470
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 498.0,
"completions/max_terminated_length": 498.0,
"completions/mean_length": 228.24609375,
"completions/mean_terminated_length": 228.24609375,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.7536,
"grad_norm": 0.036536820232868195,
"learning_rate": 2.7402135231316724e-07,
"loss": -0.0054,
"num_tokens": 230736478.0,
"reward": 1.6196913719177246,
"reward_std": 0.10245537757873535,
"rewards/accuracy_reward_long_step": 0.7109375,
"rewards/final_brier_reward_long_step": 0.8031273484230042,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8318880796432495,
"step": 471
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 459.0,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 230.26171875,
"completions/mean_terminated_length": 230.26171875,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.7552,
"grad_norm": 0.040038324892520905,
"learning_rate": 2.7224199288256225e-07,
"loss": 0.0134,
"num_tokens": 231215745.0,
"reward": 1.253148078918457,
"reward_std": 0.10847177356481552,
"rewards/accuracy_reward_long_step": 0.3359375,
"rewards/final_brier_reward_long_step": 0.8157835602760315,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8530591130256653,
"step": 472
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 459.0,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 228.953125,
"completions/mean_terminated_length": 228.953125,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.7568,
"grad_norm": 0.03876092657446861,
"learning_rate": 2.704626334519573e-07,
"loss": -0.002,
"num_tokens": 231688509.0,
"reward": 1.2908313274383545,
"reward_std": 0.11586640775203705,
"rewards/accuracy_reward_long_step": 0.38671875,
"rewards/final_brier_reward_long_step": 0.7823525667190552,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8340977430343628,
"step": 473
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 468.0,
"completions/max_terminated_length": 468.0,
"completions/mean_length": 224.69921875,
"completions/mean_terminated_length": 224.69921875,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.7584,
"grad_norm": 0.03896433115005493,
"learning_rate": 2.6868327402135234e-07,
"loss": 0.0022,
"num_tokens": 232176304.0,
"reward": 1.4751133918762207,
"reward_std": 0.09986962378025055,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.8110538721084595,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8237748742103577,
"step": 474
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 586.0,
"completions/max_terminated_length": 586.0,
"completions/mean_length": 241.28125,
"completions/mean_terminated_length": 241.28125,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.76,
"grad_norm": 0.04798499867320061,
"learning_rate": 2.669039145907473e-07,
"loss": 0.0133,
"num_tokens": 232659224.0,
"reward": 1.4235737323760986,
"reward_std": 0.14305740594863892,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.6940581798553467,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7736741304397583,
"step": 475
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 580.0,
"completions/max_terminated_length": 580.0,
"completions/mean_length": 248.6875,
"completions/mean_terminated_length": 248.6875,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.7616,
"grad_norm": 0.0414130873978138,
"learning_rate": 2.651245551601423e-07,
"loss": 0.0079,
"num_tokens": 233153904.0,
"reward": 1.4499199390411377,
"reward_std": 0.13342790305614471,
"rewards/accuracy_reward_long_step": 0.53515625,
"rewards/final_brier_reward_long_step": 0.8312417268753052,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8278130292892456,
"step": 476
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 578.0,
"completions/max_terminated_length": 578.0,
"completions/mean_length": 244.85546875,
"completions/mean_terminated_length": 244.85546875,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.7632,
"grad_norm": 0.03957832232117653,
"learning_rate": 2.6334519572953733e-07,
"loss": -0.0045,
"num_tokens": 233630819.0,
"reward": 1.3480762243270874,
"reward_std": 0.1540793478488922,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.7323105335235596,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7693694233894348,
"step": 477
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 429.0,
"completions/max_terminated_length": 429.0,
"completions/mean_length": 217.80078125,
"completions/mean_terminated_length": 217.80078125,
"completions/min_length": 78.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.7648,
"grad_norm": 0.040704309940338135,
"learning_rate": 2.6156583629893234e-07,
"loss": 0.002,
"num_tokens": 234112184.0,
"reward": 1.4834253787994385,
"reward_std": 0.176324263215065,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.7984238266944885,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7759029865264893,
"step": 478
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 468.0,
"completions/max_terminated_length": 468.0,
"completions/mean_length": 225.7578125,
"completions/mean_terminated_length": 225.7578125,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.7664,
"grad_norm": 0.039327558130025864,
"learning_rate": 2.597864768683274e-07,
"loss": 0.0018,
"num_tokens": 234606322.0,
"reward": 1.312846064567566,
"reward_std": 0.09807312488555908,
"rewards/accuracy_reward_long_step": 0.41796875,
"rewards/final_brier_reward_long_step": 0.7961425185203552,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7833666205406189,
"step": 479
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 603.0,
"completions/max_terminated_length": 603.0,
"completions/mean_length": 224.43359375,
"completions/mean_terminated_length": 224.43359375,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.768,
"grad_norm": 0.05755281448364258,
"learning_rate": 2.580071174377224e-07,
"loss": 0.0044,
"num_tokens": 235077329.0,
"reward": 1.3882198333740234,
"reward_std": 0.12557154893875122,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.7875798344612122,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.780924916267395,
"step": 480
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 630.0,
"completions/max_terminated_length": 630.0,
"completions/mean_length": 230.8125,
"completions/mean_terminated_length": 231.71766662597656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.7696,
"grad_norm": 0.04049382731318474,
"learning_rate": 2.5622775800711744e-07,
"loss": -0.0087,
"num_tokens": 235557009.0,
"reward": 1.3864805698394775,
"reward_std": 0.1668914556503296,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.7072670459747314,
"rewards/format_reward_long_step": 0.98828125,
"rewards/stepwise_brier_reward_long_step": 0.8777177929878235,
"step": 481
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 487.0,
"completions/max_terminated_length": 487.0,
"completions/mean_length": 232.35546875,
"completions/mean_terminated_length": 232.35546875,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.7712,
"grad_norm": 0.04888352006673813,
"learning_rate": 2.5444839857651245e-07,
"loss": 0.0026,
"num_tokens": 236049124.0,
"reward": 1.5799849033355713,
"reward_std": 0.10926343500614166,
"rewards/accuracy_reward_long_step": 0.68359375,
"rewards/final_brier_reward_long_step": 0.763283908367157,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8222807049751282,
"step": 482
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 469.0,
"completions/max_terminated_length": 469.0,
"completions/mean_length": 233.8671875,
"completions/mean_terminated_length": 233.8671875,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.7728,
"grad_norm": 0.035025861114263535,
"learning_rate": 2.5266903914590747e-07,
"loss": -0.0001,
"num_tokens": 236535578.0,
"reward": 1.404916524887085,
"reward_std": 0.10989418625831604,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.8273136615753174,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.839227557182312,
"step": 483
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 463.0,
"completions/max_terminated_length": 463.0,
"completions/mean_length": 229.078125,
"completions/mean_terminated_length": 229.078125,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.7744,
"grad_norm": 0.03963632136583328,
"learning_rate": 2.508896797153025e-07,
"loss": -0.0033,
"num_tokens": 237017478.0,
"reward": 1.5163967609405518,
"reward_std": 0.15413016080856323,
"rewards/accuracy_reward_long_step": 0.64453125,
"rewards/final_brier_reward_long_step": 0.6957645416259766,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7916977405548096,
"step": 484
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 445.0,
"completions/max_terminated_length": 445.0,
"completions/mean_length": 233.1484375,
"completions/mean_terminated_length": 233.1484375,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.776,
"grad_norm": 0.0386706106364727,
"learning_rate": 2.491103202846975e-07,
"loss": 0.0037,
"num_tokens": 237493188.0,
"reward": 1.385927677154541,
"reward_std": 0.19464275240898132,
"rewards/accuracy_reward_long_step": 0.4921875,
"rewards/final_brier_reward_long_step": 0.7536214590072632,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8213395476341248,
"step": 485
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 522.0,
"completions/max_terminated_length": 522.0,
"completions/mean_length": 238.83984375,
"completions/mean_terminated_length": 238.83984375,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.7776,
"grad_norm": 0.040338389575481415,
"learning_rate": 2.473309608540925e-07,
"loss": 0.0038,
"num_tokens": 237987779.0,
"reward": 1.5493258237838745,
"reward_std": 0.15063825249671936,
"rewards/accuracy_reward_long_step": 0.66015625,
"rewards/final_brier_reward_long_step": 0.7311683893203735,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8255099058151245,
"step": 486
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 467.0,
"completions/max_terminated_length": 467.0,
"completions/mean_length": 219.1171875,
"completions/mean_terminated_length": 219.1171875,
"completions/min_length": 138.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.7792,
"grad_norm": 0.059350065886974335,
"learning_rate": 2.455516014234875e-07,
"loss": 0.0032,
"num_tokens": 238479841.0,
"reward": 1.5190156698226929,
"reward_std": 0.1180032342672348,
"rewards/accuracy_reward_long_step": 0.625,
"rewards/final_brier_reward_long_step": 0.7632279396057129,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8128350973129272,
"step": 487
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 595.0,
"completions/max_terminated_length": 595.0,
"completions/mean_length": 239.76953125,
"completions/mean_terminated_length": 239.76953125,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.7808,
"grad_norm": 0.04275665804743767,
"learning_rate": 2.4377224199288254e-07,
"loss": 0.0087,
"num_tokens": 238974614.0,
"reward": 1.480884313583374,
"reward_std": 0.11418268084526062,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.7228184938430786,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8413438200950623,
"step": 488
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 570.0,
"completions/max_terminated_length": 570.0,
"completions/mean_length": 235.77734375,
"completions/mean_terminated_length": 235.77734375,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.7824,
"grad_norm": 0.04414826259016991,
"learning_rate": 2.4199288256227755e-07,
"loss": -0.0019,
"num_tokens": 239466805.0,
"reward": 1.2463829517364502,
"reward_std": 0.14157749712467194,
"rewards/accuracy_reward_long_step": 0.3984375,
"rewards/final_brier_reward_long_step": 0.6166784763336182,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7751035094261169,
"step": 489
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 510.0,
"completions/max_terminated_length": 510.0,
"completions/mean_length": 246.86328125,
"completions/mean_terminated_length": 246.86328125,
"completions/min_length": 87.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.784,
"grad_norm": 0.035570476204156876,
"learning_rate": 2.4021352313167257e-07,
"loss": 0.0011,
"num_tokens": 239961386.0,
"reward": 1.3018330335617065,
"reward_std": 0.10914282500743866,
"rewards/accuracy_reward_long_step": 0.40625,
"rewards/final_brier_reward_long_step": 0.7352542877197266,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8470777869224548,
"step": 490
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 553.0,
"completions/max_terminated_length": 553.0,
"completions/mean_length": 238.81640625,
"completions/mean_terminated_length": 238.81640625,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.7856,
"grad_norm": 0.04079211875796318,
"learning_rate": 2.3843416370106764e-07,
"loss": 0.0092,
"num_tokens": 240441995.0,
"reward": 1.3198940753936768,
"reward_std": 0.1449519544839859,
"rewards/accuracy_reward_long_step": 0.45703125,
"rewards/final_brier_reward_long_step": 0.6919292211532593,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7595219612121582,
"step": 491
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 551.0,
"completions/max_terminated_length": 551.0,
"completions/mean_length": 239.6015625,
"completions/mean_terminated_length": 239.6015625,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.7872,
"grad_norm": 0.051125992089509964,
"learning_rate": 2.366548042704626e-07,
"loss": 0.0046,
"num_tokens": 240913021.0,
"reward": 1.5111445188522339,
"reward_std": 0.1414456069469452,
"rewards/accuracy_reward_long_step": 0.609375,
"rewards/final_brier_reward_long_step": 0.7685543298721313,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.838523805141449,
"step": 492
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 408.0,
"completions/max_terminated_length": 408.0,
"completions/mean_length": 224.24609375,
"completions/mean_terminated_length": 224.24609375,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.7888,
"grad_norm": 0.0370771661400795,
"learning_rate": 2.3487544483985764e-07,
"loss": 0.011,
"num_tokens": 241386988.0,
"reward": 1.56308913230896,
"reward_std": 0.10970332473516464,
"rewards/accuracy_reward_long_step": 0.66015625,
"rewards/final_brier_reward_long_step": 0.8099468946456909,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8017843961715698,
"step": 493
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 472.0,
"completions/max_terminated_length": 472.0,
"completions/mean_length": 240.046875,
"completions/mean_terminated_length": 240.046875,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.7904,
"grad_norm": 0.0435328409075737,
"learning_rate": 2.3309608540925265e-07,
"loss": 0.0084,
"num_tokens": 241883192.0,
"reward": 1.4550728797912598,
"reward_std": 0.056624144315719604,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.8555335998535156,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7928834557533264,
"step": 494
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 718.0,
"completions/max_terminated_length": 718.0,
"completions/mean_length": 237.8515625,
"completions/mean_terminated_length": 237.8515625,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.792,
"grad_norm": 0.03587677702307701,
"learning_rate": 2.313167259786477e-07,
"loss": 0.0089,
"num_tokens": 242375578.0,
"reward": 1.5103130340576172,
"reward_std": 0.10776931047439575,
"rewards/accuracy_reward_long_step": 0.59765625,
"rewards/final_brier_reward_long_step": 0.8439902067184448,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8066369295120239,
"step": 495
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 945.0,
"completions/max_terminated_length": 945.0,
"completions/mean_length": 239.59375,
"completions/mean_terminated_length": 239.59375,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.7936,
"grad_norm": 0.041137006133794785,
"learning_rate": 2.295373665480427e-07,
"loss": 0.0062,
"num_tokens": 242859330.0,
"reward": 1.4777976274490356,
"reward_std": 0.14235195517539978,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.8539682030677795,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7759724259376526,
"step": 496
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 544.0,
"completions/max_terminated_length": 544.0,
"completions/mean_length": 230.46484375,
"completions/mean_terminated_length": 230.46484375,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.7952,
"grad_norm": 0.03613395616412163,
"learning_rate": 2.277580071174377e-07,
"loss": -0.0055,
"num_tokens": 243345321.0,
"reward": 1.5040514469146729,
"reward_std": 0.1266600787639618,
"rewards/accuracy_reward_long_step": 0.60546875,
"rewards/final_brier_reward_long_step": 0.8192323446273804,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7750980257987976,
"step": 497
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 378.0,
"completions/max_terminated_length": 378.0,
"completions/mean_length": 226.73828125,
"completions/mean_terminated_length": 226.73828125,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.7968,
"grad_norm": 0.04258696362376213,
"learning_rate": 2.2597864768683274e-07,
"loss": -0.0045,
"num_tokens": 243827942.0,
"reward": 1.3869428634643555,
"reward_std": 0.1397380828857422,
"rewards/accuracy_reward_long_step": 0.4921875,
"rewards/final_brier_reward_long_step": 0.7650054693222046,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8140161037445068,
"step": 498
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 530.0,
"completions/max_terminated_length": 530.0,
"completions/mean_length": 240.43359375,
"completions/mean_terminated_length": 240.43359375,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.7984,
"grad_norm": 0.03320414200425148,
"learning_rate": 2.2419928825622775e-07,
"loss": -0.0118,
"num_tokens": 244325933.0,
"reward": 1.4908723831176758,
"reward_std": 0.16621750593185425,
"rewards/accuracy_reward_long_step": 0.60546875,
"rewards/final_brier_reward_long_step": 0.7513167858123779,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7902982234954834,
"step": 499
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 454.0,
"completions/max_terminated_length": 454.0,
"completions/mean_length": 226.42578125,
"completions/mean_terminated_length": 226.42578125,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.8,
"grad_norm": 0.03856438770890236,
"learning_rate": 2.2241992882562277e-07,
"loss": 0.0033,
"num_tokens": 244805666.0,
"reward": 1.5031077861785889,
"reward_std": 0.12228081375360489,
"rewards/accuracy_reward_long_step": 0.59375,
"rewards/final_brier_reward_long_step": 0.8210663795471191,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8163642883300781,
"step": 500
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 433.0,
"completions/max_terminated_length": 433.0,
"completions/mean_length": 220.3984375,
"completions/mean_terminated_length": 220.3984375,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.8016,
"grad_norm": 0.0355200357735157,
"learning_rate": 2.206405693950178e-07,
"loss": -0.0003,
"num_tokens": 245298544.0,
"reward": 1.5138522386550903,
"reward_std": 0.15135133266448975,
"rewards/accuracy_reward_long_step": 0.578125,
"rewards/final_brier_reward_long_step": 0.8448459506034851,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.9058756828308105,
"step": 501
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 425.0,
"completions/max_terminated_length": 425.0,
"completions/mean_length": 237.34765625,
"completions/mean_terminated_length": 237.34765625,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.8032,
"grad_norm": 0.037190381437540054,
"learning_rate": 2.188612099644128e-07,
"loss": 0.0034,
"num_tokens": 245784137.0,
"reward": 1.5037150382995605,
"reward_std": 0.15791866183280945,
"rewards/accuracy_reward_long_step": 0.609375,
"rewards/final_brier_reward_long_step": 0.7559190988540649,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8214409947395325,
"step": 502
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 555.0,
"completions/max_terminated_length": 555.0,
"completions/mean_length": 224.734375,
"completions/mean_terminated_length": 224.734375,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.8048,
"grad_norm": 0.0395224392414093,
"learning_rate": 2.170818505338078e-07,
"loss": 0.0042,
"num_tokens": 246249421.0,
"reward": 1.5526416301727295,
"reward_std": 0.11045798659324646,
"rewards/accuracy_reward_long_step": 0.6484375,
"rewards/final_brier_reward_long_step": 0.818356990814209,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7984594702720642,
"step": 503
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 514.0,
"completions/max_terminated_length": 514.0,
"completions/mean_length": 236.96484375,
"completions/mean_terminated_length": 236.96484375,
"completions/min_length": 135.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.8064,
"grad_norm": 0.03467653691768646,
"learning_rate": 2.1530249110320285e-07,
"loss": 0.0056,
"num_tokens": 246741180.0,
"reward": 1.4294129610061646,
"reward_std": 0.15054050087928772,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.730989396572113,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7522871494293213,
"step": 504
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 491.0,
"completions/max_terminated_length": 491.0,
"completions/mean_length": 244.40234375,
"completions/mean_terminated_length": 244.40234375,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.808,
"grad_norm": 0.03365206718444824,
"learning_rate": 2.1352313167259786e-07,
"loss": -0.0079,
"num_tokens": 247235891.0,
"reward": 1.3486175537109375,
"reward_std": 0.10975901782512665,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.8065632581710815,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8222817182540894,
"step": 505
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 522.0,
"completions/max_terminated_length": 522.0,
"completions/mean_length": 230.9921875,
"completions/mean_terminated_length": 230.9921875,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.8096,
"grad_norm": 0.041645560413599014,
"learning_rate": 2.1174377224199288e-07,
"loss": 0.0051,
"num_tokens": 247714529.0,
"reward": 1.614863395690918,
"reward_std": 0.1503724455833435,
"rewards/accuracy_reward_long_step": 0.703125,
"rewards/final_brier_reward_long_step": 0.7402952909469604,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.9066582918167114,
"step": 506
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 606.0,
"completions/max_terminated_length": 606.0,
"completions/mean_length": 236.5625,
"completions/mean_terminated_length": 236.5625,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.8112,
"grad_norm": 0.045049868524074554,
"learning_rate": 2.099644128113879e-07,
"loss": -0.0034,
"num_tokens": 248209161.0,
"reward": 1.312518835067749,
"reward_std": 0.1855650395154953,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.7521023154258728,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7323479056358337,
"step": 507
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 455.0,
"completions/max_terminated_length": 455.0,
"completions/mean_length": 229.140625,
"completions/mean_terminated_length": 229.140625,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.8128,
"grad_norm": 0.047168269753456116,
"learning_rate": 2.081850533807829e-07,
"loss": 0.0165,
"num_tokens": 248688005.0,
"reward": 1.4646296501159668,
"reward_std": 0.09372396022081375,
"rewards/accuracy_reward_long_step": 0.57421875,
"rewards/final_brier_reward_long_step": 0.774226188659668,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7874171733856201,
"step": 508
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 486.0,
"completions/max_terminated_length": 486.0,
"completions/mean_length": 234.80078125,
"completions/mean_terminated_length": 234.80078125,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.8144,
"grad_norm": 0.04063938930630684,
"learning_rate": 2.0640569395017792e-07,
"loss": -0.015,
"num_tokens": 249176586.0,
"reward": 1.3976056575775146,
"reward_std": 0.1380692720413208,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.6868070363998413,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7161159515380859,
"step": 509
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 457.0,
"completions/max_terminated_length": 457.0,
"completions/mean_length": 234.921875,
"completions/mean_terminated_length": 234.921875,
"completions/min_length": 100.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.816,
"grad_norm": 0.04220227152109146,
"learning_rate": 2.0462633451957296e-07,
"loss": -0.0111,
"num_tokens": 249668526.0,
"reward": 1.4461275339126587,
"reward_std": 0.17553001642227173,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.7025785446166992,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7381817102432251,
"step": 510
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 435.0,
"completions/max_terminated_length": 435.0,
"completions/mean_length": 223.1015625,
"completions/mean_terminated_length": 223.1015625,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.8176,
"grad_norm": 0.04910369962453842,
"learning_rate": 2.0284697508896798e-07,
"loss": 0.013,
"num_tokens": 250138040.0,
"reward": 1.480068325996399,
"reward_std": 0.17966505885124207,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.8528038263320923,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.9112191200256348,
"step": 511
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 389.0,
"completions/max_terminated_length": 389.0,
"completions/mean_length": 235.9375,
"completions/mean_terminated_length": 235.9375,
"completions/min_length": 111.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.8192,
"grad_norm": 0.03783193230628967,
"learning_rate": 2.0106761565836297e-07,
"loss": 0.0023,
"num_tokens": 250629424.0,
"reward": 1.5700932741165161,
"reward_std": 0.12437284737825394,
"rewards/accuracy_reward_long_step": 0.66015625,
"rewards/final_brier_reward_long_step": 0.8810421824455261,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7587060928344727,
"step": 512
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 510.0,
"completions/max_terminated_length": 510.0,
"completions/mean_length": 255.25390625,
"completions/mean_terminated_length": 255.25390625,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.8208,
"grad_norm": 0.052237872034311295,
"learning_rate": 1.99288256227758e-07,
"loss": -0.0014,
"num_tokens": 251129969.0,
"reward": 1.3335564136505127,
"reward_std": 0.13581520318984985,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.7354176044464111,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7863079905509949,
"step": 513
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 520.0,
"completions/max_terminated_length": 520.0,
"completions/mean_length": 227.5546875,
"completions/mean_terminated_length": 227.5546875,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.8224,
"grad_norm": 0.03678586706519127,
"learning_rate": 1.9750889679715302e-07,
"loss": -0.0089,
"num_tokens": 251612919.0,
"reward": 1.4214198589324951,
"reward_std": 0.1188969761133194,
"rewards/accuracy_reward_long_step": 0.52734375,
"rewards/final_brier_reward_long_step": 0.7470394372940063,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8292652368545532,
"step": 514
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 514.0,
"completions/max_terminated_length": 514.0,
"completions/mean_length": 245.015625,
"completions/mean_terminated_length": 245.015625,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.824,
"grad_norm": 0.05070950463414192,
"learning_rate": 1.9572953736654804e-07,
"loss": 0.0063,
"num_tokens": 252120547.0,
"reward": 1.5003015995025635,
"reward_std": 0.14131146669387817,
"rewards/accuracy_reward_long_step": 0.609375,
"rewards/final_brier_reward_long_step": 0.7343014478683472,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8294050693511963,
"step": 515
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 586.0,
"completions/max_terminated_length": 586.0,
"completions/mean_length": 234.69921875,
"completions/mean_terminated_length": 234.69921875,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.8256,
"grad_norm": 0.04203731194138527,
"learning_rate": 1.9395017793594305e-07,
"loss": 0.0013,
"num_tokens": 252610774.0,
"reward": 1.4129828214645386,
"reward_std": 0.09008841961622238,
"rewards/accuracy_reward_long_step": 0.51171875,
"rewards/final_brier_reward_long_step": 0.7625414133071899,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8425147533416748,
"step": 516
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 516.0,
"completions/max_terminated_length": 516.0,
"completions/mean_length": 244.05078125,
"completions/mean_terminated_length": 244.05078125,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.8272,
"grad_norm": 0.04504576325416565,
"learning_rate": 1.9217081850533807e-07,
"loss": 0.0092,
"num_tokens": 253084835.0,
"reward": 1.3626664876937866,
"reward_std": 0.13317476212978363,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.7290824055671692,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7372086048126221,
"step": 517
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 535.0,
"completions/max_terminated_length": 535.0,
"completions/mean_length": 235.015625,
"completions/mean_terminated_length": 235.015625,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.8288,
"grad_norm": 0.04393794387578964,
"learning_rate": 1.9039145907473308e-07,
"loss": 0.014,
"num_tokens": 253571311.0,
"reward": 1.5000429153442383,
"reward_std": 0.13177230954170227,
"rewards/accuracy_reward_long_step": 0.5859375,
"rewards/final_brier_reward_long_step": 0.7909968495368958,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8654246926307678,
"step": 518
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 596.0,
"completions/max_terminated_length": 596.0,
"completions/mean_length": 247.33203125,
"completions/mean_terminated_length": 247.33203125,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.8304,
"grad_norm": 0.03598223626613617,
"learning_rate": 1.8861209964412812e-07,
"loss": 0.0185,
"num_tokens": 254043356.0,
"reward": 1.3758127689361572,
"reward_std": 0.17646163702011108,
"rewards/accuracy_reward_long_step": 0.47265625,
"rewards/final_brier_reward_long_step": 0.7502039074897766,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8624222278594971,
"step": 519
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 550.0,
"completions/max_terminated_length": 550.0,
"completions/mean_length": 236.67578125,
"completions/mean_terminated_length": 236.67578125,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.832,
"grad_norm": 0.03368176519870758,
"learning_rate": 1.8683274021352314e-07,
"loss": 0.0061,
"num_tokens": 254522601.0,
"reward": 1.4897700548171997,
"reward_std": 0.10578904300928116,
"rewards/accuracy_reward_long_step": 0.58984375,
"rewards/final_brier_reward_long_step": 0.7538363337516785,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8458687663078308,
"step": 520
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 458.0,
"completions/max_terminated_length": 458.0,
"completions/mean_length": 230.203125,
"completions/mean_terminated_length": 230.203125,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.8336,
"grad_norm": 0.05814650282263756,
"learning_rate": 1.8505338078291812e-07,
"loss": 0.0063,
"num_tokens": 254993893.0,
"reward": 1.419898271560669,
"reward_std": 0.11159418523311615,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.7294105291366577,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7939323782920837,
"step": 521
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 633.0,
"completions/max_terminated_length": 633.0,
"completions/mean_length": 240.46875,
"completions/mean_terminated_length": 240.46875,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.8352,
"grad_norm": 0.03419061005115509,
"learning_rate": 1.8327402135231316e-07,
"loss": -0.0096,
"num_tokens": 255490549.0,
"reward": 1.4004169702529907,
"reward_std": 0.12270954251289368,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.8064777851104736,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8889400959014893,
"step": 522
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 499.0,
"completions/max_terminated_length": 499.0,
"completions/mean_length": 242.21875,
"completions/mean_terminated_length": 242.21875,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.8368,
"grad_norm": 0.036356884986162186,
"learning_rate": 1.8149466192170818e-07,
"loss": -0.0055,
"num_tokens": 255991109.0,
"reward": 1.4097753763198853,
"reward_std": 0.10558044910430908,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.7266496419906616,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8343270421028137,
"step": 523
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 494.0,
"completions/max_terminated_length": 494.0,
"completions/mean_length": 235.08203125,
"completions/mean_terminated_length": 235.08203125,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.8384,
"grad_norm": 0.04891684278845787,
"learning_rate": 1.797153024911032e-07,
"loss": 0.0014,
"num_tokens": 256465850.0,
"reward": 1.4318642616271973,
"reward_std": 0.12251758575439453,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.768867552280426,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8023396730422974,
"step": 524
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 477.0,
"completions/max_terminated_length": 477.0,
"completions/mean_length": 231.28125,
"completions/mean_terminated_length": 231.28125,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.84,
"grad_norm": 0.04874948784708977,
"learning_rate": 1.7793594306049823e-07,
"loss": -0.0015,
"num_tokens": 256950562.0,
"reward": 1.454651117324829,
"reward_std": 0.15635967254638672,
"rewards/accuracy_reward_long_step": 0.578125,
"rewards/final_brier_reward_long_step": 0.686775803565979,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8193286657333374,
"step": 525
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 503.0,
"completions/max_terminated_length": 503.0,
"completions/mean_length": 247.87109375,
"completions/mean_terminated_length": 247.87109375,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.8416,
"grad_norm": 0.05547276511788368,
"learning_rate": 1.7615658362989322e-07,
"loss": 0.0132,
"num_tokens": 257429993.0,
"reward": 1.3650845289230347,
"reward_std": 0.16451743245124817,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.779395341873169,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8059428930282593,
"step": 526
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 786.0,
"completions/max_terminated_length": 786.0,
"completions/mean_length": 250.98828125,
"completions/mean_terminated_length": 250.98828125,
"completions/min_length": 129.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.8432,
"grad_norm": 0.035361409187316895,
"learning_rate": 1.7437722419928824e-07,
"loss": 0.0091,
"num_tokens": 257921134.0,
"reward": 1.50462007522583,
"reward_std": 0.12116993218660355,
"rewards/accuracy_reward_long_step": 0.60546875,
"rewards/final_brier_reward_long_step": 0.7998992204666138,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7967061400413513,
"step": 527
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 548.0,
"completions/max_terminated_length": 548.0,
"completions/mean_length": 253.18359375,
"completions/mean_terminated_length": 253.18359375,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.8448,
"grad_norm": 0.03830602765083313,
"learning_rate": 1.7259786476868328e-07,
"loss": -0.0055,
"num_tokens": 258402709.0,
"reward": 1.3311948776245117,
"reward_std": 0.11117161065340042,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.7396460771560669,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7257586717605591,
"step": 528
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 427.0,
"completions/max_terminated_length": 427.0,
"completions/mean_length": 249.9921875,
"completions/mean_terminated_length": 249.9921875,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.8464,
"grad_norm": 0.04274242743849754,
"learning_rate": 1.708185053380783e-07,
"loss": 0.0085,
"num_tokens": 258892547.0,
"reward": 1.5267962217330933,
"reward_std": 0.10823096334934235,
"rewards/accuracy_reward_long_step": 0.609375,
"rewards/final_brier_reward_long_step": 0.8286827802658081,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8410018086433411,
"step": 529
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 533.0,
"completions/max_terminated_length": 533.0,
"completions/mean_length": 247.6875,
"completions/mean_terminated_length": 247.6875,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.848,
"grad_norm": 0.046045657247304916,
"learning_rate": 1.690391459074733e-07,
"loss": -0.0093,
"num_tokens": 259389531.0,
"reward": 1.3362867832183838,
"reward_std": 0.10417808592319489,
"rewards/accuracy_reward_long_step": 0.4296875,
"rewards/final_brier_reward_long_step": 0.8625573515892029,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7638399600982666,
"step": 530
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 503.0,
"completions/max_terminated_length": 503.0,
"completions/mean_length": 237.73828125,
"completions/mean_terminated_length": 237.73828125,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.8496,
"grad_norm": 0.055190231651067734,
"learning_rate": 1.6725978647686832e-07,
"loss": -0.0043,
"num_tokens": 259886432.0,
"reward": 1.4131598472595215,
"reward_std": 0.12312982231378555,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.6990100145339966,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7817543745040894,
"step": 531
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 386.0,
"completions/max_terminated_length": 386.0,
"completions/mean_length": 232.515625,
"completions/mean_terminated_length": 232.515625,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.8512,
"grad_norm": 0.03550275042653084,
"learning_rate": 1.6548042704626334e-07,
"loss": -0.0068,
"num_tokens": 260358252.0,
"reward": 1.4527807235717773,
"reward_std": 0.13987179100513458,
"rewards/accuracy_reward_long_step": 0.578125,
"rewards/final_brier_reward_long_step": 0.7386081218719482,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7678276300430298,
"step": 532
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 606.0,
"completions/max_terminated_length": 606.0,
"completions/mean_length": 237.8671875,
"completions/mean_terminated_length": 238.80001831054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.8528,
"grad_norm": 0.040301430970430374,
"learning_rate": 1.6370106761565835e-07,
"loss": -0.024,
"num_tokens": 260832714.0,
"reward": 1.428901195526123,
"reward_std": 0.09275516867637634,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.776361346244812,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8533056974411011,
"step": 533
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 462.0,
"completions/max_terminated_length": 462.0,
"completions/mean_length": 238.7578125,
"completions/mean_terminated_length": 238.7578125,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.8544,
"grad_norm": 0.04402696341276169,
"learning_rate": 1.619217081850534e-07,
"loss": 0.0134,
"num_tokens": 261320468.0,
"reward": 1.4897812604904175,
"reward_std": 0.09281490743160248,
"rewards/accuracy_reward_long_step": 0.59765625,
"rewards/final_brier_reward_long_step": 0.7256316542625427,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.842868447303772,
"step": 534
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 517.0,
"completions/max_terminated_length": 517.0,
"completions/mean_length": 227.56640625,
"completions/mean_terminated_length": 228.45883178710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.856,
"grad_norm": 0.04461900517344475,
"learning_rate": 1.601423487544484e-07,
"loss": -0.0036,
"num_tokens": 261802045.0,
"reward": 1.5405223369598389,
"reward_std": 0.1693045049905777,
"rewards/accuracy_reward_long_step": 0.66015625,
"rewards/final_brier_reward_long_step": 0.6963077783584595,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8329689502716064,
"step": 535
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 630.0,
"completions/max_terminated_length": 630.0,
"completions/mean_length": 243.41015625,
"completions/mean_terminated_length": 243.41015625,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.8576,
"grad_norm": 0.0438154935836792,
"learning_rate": 1.583629893238434e-07,
"loss": 0.0014,
"num_tokens": 262282190.0,
"reward": 1.3707561492919922,
"reward_std": 0.16878756880760193,
"rewards/accuracy_reward_long_step": 0.4609375,
"rewards/final_brier_reward_long_step": 0.7976784706115723,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8415963649749756,
"step": 536
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 528.0,
"completions/max_terminated_length": 528.0,
"completions/mean_length": 233.390625,
"completions/mean_terminated_length": 233.390625,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.8592,
"grad_norm": 0.04220299795269966,
"learning_rate": 1.5658362989323843e-07,
"loss": 0.0062,
"num_tokens": 262769162.0,
"reward": 1.5402344465255737,
"reward_std": 0.12223983556032181,
"rewards/accuracy_reward_long_step": 0.609375,
"rewards/final_brier_reward_long_step": 0.8557167053222656,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8677208423614502,
"step": 537
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 601.0,
"completions/max_terminated_length": 601.0,
"completions/mean_length": 235.33203125,
"completions/mean_terminated_length": 235.33203125,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.8608,
"grad_norm": 0.03791709989309311,
"learning_rate": 1.5480427046263345e-07,
"loss": -0.0013,
"num_tokens": 263247303.0,
"reward": 1.5486342906951904,
"reward_std": 0.11805526912212372,
"rewards/accuracy_reward_long_step": 0.65625,
"rewards/final_brier_reward_long_step": 0.7473232746124268,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8222134709358215,
"step": 538
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 540.0,
"completions/max_terminated_length": 540.0,
"completions/mean_length": 238.59375,
"completions/mean_terminated_length": 238.59375,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.8624,
"grad_norm": 0.04007653519511223,
"learning_rate": 1.5302491103202846e-07,
"loss": -0.0057,
"num_tokens": 263739847.0,
"reward": 1.5600248575210571,
"reward_std": 0.14350242912769318,
"rewards/accuracy_reward_long_step": 0.67578125,
"rewards/final_brier_reward_long_step": 0.7258714437484741,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8111032247543335,
"step": 539
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 541.0,
"completions/max_terminated_length": 541.0,
"completions/mean_length": 250.05859375,
"completions/mean_terminated_length": 250.05859375,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.864,
"grad_norm": 0.03347768262028694,
"learning_rate": 1.512455516014235e-07,
"loss": -0.0126,
"num_tokens": 264235814.0,
"reward": 1.495011568069458,
"reward_std": 0.08270839601755142,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.8477886915206909,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8822580575942993,
"step": 540
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 613.0,
"completions/max_terminated_length": 613.0,
"completions/mean_length": 261.34375,
"completions/mean_terminated_length": 261.34375,
"completions/min_length": 147.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.8656,
"grad_norm": 0.03869554027915001,
"learning_rate": 1.494661921708185e-07,
"loss": -0.0073,
"num_tokens": 264741902.0,
"reward": 1.3477532863616943,
"reward_std": 0.1298760026693344,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.7019554376602173,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8140577077865601,
"step": 541
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 724.0,
"completions/max_terminated_length": 724.0,
"completions/mean_length": 240.37890625,
"completions/mean_terminated_length": 240.37890625,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.8672,
"grad_norm": 0.036735206842422485,
"learning_rate": 1.476868327402135e-07,
"loss": 0.0053,
"num_tokens": 265240223.0,
"reward": 1.2603323459625244,
"reward_std": 0.05455077812075615,
"rewards/accuracy_reward_long_step": 0.375,
"rewards/final_brier_reward_long_step": 0.7330679893493652,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8082613945007324,
"step": 542
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 865.0,
"completions/max_terminated_length": 865.0,
"completions/mean_length": 226.53515625,
"completions/mean_terminated_length": 226.53515625,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.8688,
"grad_norm": 0.04960142448544502,
"learning_rate": 1.4590747330960855e-07,
"loss": 0.0033,
"num_tokens": 265742248.0,
"reward": 1.5600826740264893,
"reward_std": 0.10487354546785355,
"rewards/accuracy_reward_long_step": 0.65625,
"rewards/final_brier_reward_long_step": 0.7979686260223389,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8173620700836182,
"step": 543
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 665.0,
"completions/max_terminated_length": 665.0,
"completions/mean_length": 251.0546875,
"completions/mean_terminated_length": 251.0546875,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.8704,
"grad_norm": 0.03681975230574608,
"learning_rate": 1.4412811387900356e-07,
"loss": 0.0118,
"num_tokens": 266235870.0,
"reward": 1.3697454929351807,
"reward_std": 0.14522971212863922,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.7746487855911255,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.798083484172821,
"step": 544
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 459.0,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 229.875,
"completions/mean_terminated_length": 229.875,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.872,
"grad_norm": 0.03889426216483116,
"learning_rate": 1.4234875444839858e-07,
"loss": -0.0055,
"num_tokens": 266730838.0,
"reward": 1.3409353494644165,
"reward_std": 0.18033601343631744,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.7062370777130127,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7981289625167847,
"step": 545
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 483.0,
"completions/max_terminated_length": 483.0,
"completions/mean_length": 254.52734375,
"completions/mean_terminated_length": 254.52734375,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.8736,
"grad_norm": 0.04387963190674782,
"learning_rate": 1.405693950177936e-07,
"loss": 0.0065,
"num_tokens": 267218405.0,
"reward": 1.3384535312652588,
"reward_std": 0.112638920545578,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.725222647190094,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8317165374755859,
"step": 546
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 579.0,
"completions/max_terminated_length": 579.0,
"completions/mean_length": 239.703125,
"completions/mean_terminated_length": 239.703125,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.8752,
"grad_norm": 0.03998541459441185,
"learning_rate": 1.387900355871886e-07,
"loss": -0.0061,
"num_tokens": 267706177.0,
"reward": 1.5569477081298828,
"reward_std": 0.1203605979681015,
"rewards/accuracy_reward_long_step": 0.6484375,
"rewards/final_brier_reward_long_step": 0.8150613307952881,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8189792633056641,
"step": 547
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.0,
"completions/max_terminated_length": 505.0,
"completions/mean_length": 247.90625,
"completions/mean_terminated_length": 247.90625,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.8768,
"grad_norm": 0.040950994938611984,
"learning_rate": 1.3701067615658362e-07,
"loss": -0.0104,
"num_tokens": 268198305.0,
"reward": 1.3657258749008179,
"reward_std": 0.1606340855360031,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.8146769404411316,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8826013803482056,
"step": 548
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 416.0,
"completions/max_terminated_length": 416.0,
"completions/mean_length": 231.60546875,
"completions/mean_terminated_length": 231.60546875,
"completions/min_length": 135.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.8784,
"grad_norm": 0.050138987600803375,
"learning_rate": 1.3523131672597866e-07,
"loss": -0.0009,
"num_tokens": 268659020.0,
"reward": 1.3868610858917236,
"reward_std": 0.1097867488861084,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.715602695941925,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8474666476249695,
"step": 549
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 551.0,
"completions/max_terminated_length": 551.0,
"completions/mean_length": 224.76171875,
"completions/mean_terminated_length": 224.76171875,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.88,
"grad_norm": 0.058884453028440475,
"learning_rate": 1.3345195729537365e-07,
"loss": 0.0061,
"num_tokens": 269130471.0,
"reward": 1.539604902267456,
"reward_std": 0.15575310587882996,
"rewards/accuracy_reward_long_step": 0.61328125,
"rewards/final_brier_reward_long_step": 0.8004753589630127,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.904819130897522,
"step": 550
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 574.0,
"completions/max_terminated_length": 574.0,
"completions/mean_length": 232.984375,
"completions/mean_terminated_length": 232.984375,
"completions/min_length": 135.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.8816,
"grad_norm": 0.04245281219482422,
"learning_rate": 1.3167259786476866e-07,
"loss": -0.0042,
"num_tokens": 269601147.0,
"reward": 1.1472269296646118,
"reward_std": 0.12594836950302124,
"rewards/accuracy_reward_long_step": 0.28125,
"rewards/final_brier_reward_long_step": 0.6631394624710083,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.800768256187439,
"step": 551
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 400.0,
"completions/max_terminated_length": 400.0,
"completions/mean_length": 230.94140625,
"completions/mean_terminated_length": 230.94140625,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.8832,
"grad_norm": 0.0368904173374176,
"learning_rate": 1.298932384341637e-07,
"loss": 0.0065,
"num_tokens": 270091988.0,
"reward": 1.429011344909668,
"reward_std": 0.12329679727554321,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.7112011313438416,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8173440098762512,
"step": 552
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 435.0,
"completions/max_terminated_length": 435.0,
"completions/mean_length": 228.328125,
"completions/mean_terminated_length": 228.328125,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.8848,
"grad_norm": 0.03731711953878403,
"learning_rate": 1.2811387900355872e-07,
"loss": 0.0103,
"num_tokens": 270583416.0,
"reward": 1.5979522466659546,
"reward_std": 0.10820707678794861,
"rewards/accuracy_reward_long_step": 0.68359375,
"rewards/final_brier_reward_long_step": 0.8128556609153748,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8445781469345093,
"step": 553
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 521.0,
"completions/max_terminated_length": 521.0,
"completions/mean_length": 237.7421875,
"completions/mean_terminated_length": 237.7421875,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.8864,
"grad_norm": 0.03969436511397362,
"learning_rate": 1.2633451957295373e-07,
"loss": -0.0071,
"num_tokens": 271076430.0,
"reward": 1.3755825757980347,
"reward_std": 0.11075370013713837,
"rewards/accuracy_reward_long_step": 0.48046875,
"rewards/final_brier_reward_long_step": 0.7792448997497559,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8012101054191589,
"step": 554
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 494.0,
"completions/max_terminated_length": 494.0,
"completions/mean_length": 246.546875,
"completions/mean_terminated_length": 246.546875,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.888,
"grad_norm": 0.03638119623064995,
"learning_rate": 1.2455516014234875e-07,
"loss": 0.0094,
"num_tokens": 271573154.0,
"reward": 1.4438494443893433,
"reward_std": 0.14020705223083496,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.7870507836341858,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8320969343185425,
"step": 555
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 503.0,
"completions/max_terminated_length": 503.0,
"completions/mean_length": 234.90234375,
"completions/mean_terminated_length": 234.90234375,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.8896,
"grad_norm": 0.045414846390485764,
"learning_rate": 1.2277580071174376e-07,
"loss": 0.0053,
"num_tokens": 272051201.0,
"reward": 1.4294031858444214,
"reward_std": 0.07926599681377411,
"rewards/accuracy_reward_long_step": 0.52734375,
"rewards/final_brier_reward_long_step": 0.8273754119873047,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7808624505996704,
"step": 556
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 735.0,
"completions/max_terminated_length": 735.0,
"completions/mean_length": 248.7890625,
"completions/mean_terminated_length": 248.7890625,
"completions/min_length": 146.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.8912,
"grad_norm": 0.042798083275556564,
"learning_rate": 1.2099644128113878e-07,
"loss": -0.0078,
"num_tokens": 272544787.0,
"reward": 1.5465130805969238,
"reward_std": 0.09747041761875153,
"rewards/accuracy_reward_long_step": 0.65234375,
"rewards/final_brier_reward_long_step": 0.7800741791725159,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7966029644012451,
"step": 557
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 536.0,
"completions/max_terminated_length": 536.0,
"completions/mean_length": 231.265625,
"completions/mean_terminated_length": 231.265625,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.8928,
"grad_norm": 0.03589711710810661,
"learning_rate": 1.1921708185053382e-07,
"loss": -0.0023,
"num_tokens": 273043191.0,
"reward": 1.419055461883545,
"reward_std": 0.12412445992231369,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.8063081502914429,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8699132204055786,
"step": 558
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 465.0,
"completions/max_terminated_length": 465.0,
"completions/mean_length": 234.390625,
"completions/mean_terminated_length": 234.390625,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.8944,
"grad_norm": 0.0373535081744194,
"learning_rate": 1.1743772241992882e-07,
"loss": -0.0117,
"num_tokens": 273519939.0,
"reward": 1.3120028972625732,
"reward_std": 0.15166430175304413,
"rewards/accuracy_reward_long_step": 0.44140625,
"rewards/final_brier_reward_long_step": 0.6639257669448853,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8184609413146973,
"step": 559
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 421.0,
"completions/max_terminated_length": 421.0,
"completions/mean_length": 229.53515625,
"completions/mean_terminated_length": 229.53515625,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.896,
"grad_norm": 0.043007586151361465,
"learning_rate": 1.1565836298932385e-07,
"loss": 0.0086,
"num_tokens": 274002692.0,
"reward": 1.4279296398162842,
"reward_std": 0.13204392790794373,
"rewards/accuracy_reward_long_step": 0.546875,
"rewards/final_brier_reward_long_step": 0.721155047416687,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8030637502670288,
"step": 560
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 500.0,
"completions/max_terminated_length": 500.0,
"completions/mean_length": 241.80078125,
"completions/mean_terminated_length": 241.80078125,
"completions/min_length": 111.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.8976,
"grad_norm": 0.04521722346544266,
"learning_rate": 1.1387900355871885e-07,
"loss": 0.0018,
"num_tokens": 274482809.0,
"reward": 1.4112411737442017,
"reward_std": 0.08869924396276474,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.8020182847976685,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8273211717605591,
"step": 561
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 436.0,
"completions/max_terminated_length": 436.0,
"completions/mean_length": 237.63671875,
"completions/mean_terminated_length": 237.63671875,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.8992,
"grad_norm": 0.04603974521160126,
"learning_rate": 1.1209964412811388e-07,
"loss": 0.011,
"num_tokens": 274960236.0,
"reward": 1.3271350860595703,
"reward_std": 0.0897248238325119,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.7432428598403931,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8309223055839539,
"step": 562
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 481.0,
"completions/max_terminated_length": 481.0,
"completions/mean_length": 244.44140625,
"completions/mean_terminated_length": 244.44140625,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.9008,
"grad_norm": 0.04642568156123161,
"learning_rate": 1.103202846975089e-07,
"loss": -0.0195,
"num_tokens": 275447389.0,
"reward": 1.245979905128479,
"reward_std": 0.11592195183038712,
"rewards/accuracy_reward_long_step": 0.3828125,
"rewards/final_brier_reward_long_step": 0.720180869102478,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7324886322021484,
"step": 563
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 421.0,
"completions/max_terminated_length": 421.0,
"completions/mean_length": 218.67578125,
"completions/mean_terminated_length": 218.67578125,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.9024,
"grad_norm": 0.043522898107767105,
"learning_rate": 1.085409252669039e-07,
"loss": 0.0223,
"num_tokens": 275928954.0,
"reward": 1.5771305561065674,
"reward_std": 0.11938679218292236,
"rewards/accuracy_reward_long_step": 0.671875,
"rewards/final_brier_reward_long_step": 0.7716304063796997,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8493919372558594,
"step": 564
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 623.0,
"completions/max_terminated_length": 623.0,
"completions/mean_length": 227.44921875,
"completions/mean_terminated_length": 227.44921875,
"completions/min_length": 129.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.904,
"grad_norm": 0.037754617631435394,
"learning_rate": 1.0676156583629893e-07,
"loss": 0.0003,
"num_tokens": 276409509.0,
"reward": 1.6633574962615967,
"reward_std": 0.13154950737953186,
"rewards/accuracy_reward_long_step": 0.75390625,
"rewards/final_brier_reward_long_step": 0.8070245981216431,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.8385924696922302,
"step": 565
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 627.0,
"completions/max_terminated_length": 627.0,
"completions/mean_length": 245.3046875,
"completions/mean_terminated_length": 245.3046875,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.9056,
"grad_norm": 0.04103442654013634,
"learning_rate": 1.0498220640569395e-07,
"loss": 0.0011,
"num_tokens": 276899091.0,
"reward": 1.378248691558838,
"reward_std": 0.1689617931842804,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.7711043357849121,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.788765549659729,
"step": 566
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 593.0,
"completions/max_terminated_length": 593.0,
"completions/mean_length": 254.94140625,
"completions/mean_terminated_length": 254.94140625,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.9072,
"grad_norm": 0.03693830594420433,
"learning_rate": 1.0320284697508896e-07,
"loss": -0.0089,
"num_tokens": 277401420.0,
"reward": 1.2199474573135376,
"reward_std": 0.18783383071422577,
"rewards/accuracy_reward_long_step": 0.3515625,
"rewards/final_brier_reward_long_step": 0.6998242139816284,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7737154960632324,
"step": 567
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 618.0,
"completions/max_terminated_length": 618.0,
"completions/mean_length": 240.95703125,
"completions/mean_terminated_length": 240.95703125,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.9088,
"grad_norm": 0.04158253222703934,
"learning_rate": 1.0142348754448399e-07,
"loss": 0.0082,
"num_tokens": 277887441.0,
"reward": 1.4272714853286743,
"reward_std": 0.17625702917575836,
"rewards/accuracy_reward_long_step": 0.5390625,
"rewards/final_brier_reward_long_step": 0.776642918586731,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7761929035186768,
"step": 568
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 536.0,
"completions/max_terminated_length": 536.0,
"completions/mean_length": 231.8125,
"completions/mean_terminated_length": 231.8125,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.9104,
"grad_norm": 0.0454368069767952,
"learning_rate": 9.9644128113879e-08,
"loss": 0.0031,
"num_tokens": 278370153.0,
"reward": 1.3535585403442383,
"reward_std": 0.09445783495903015,
"rewards/accuracy_reward_long_step": 0.43359375,
"rewards/final_brier_reward_long_step": 0.843848466873169,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8360108137130737,
"step": 569
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 452.0,
"completions/max_terminated_length": 452.0,
"completions/mean_length": 228.94921875,
"completions/mean_terminated_length": 228.94921875,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.912,
"grad_norm": 0.03921419754624367,
"learning_rate": 9.786476868327402e-08,
"loss": 0.0004,
"num_tokens": 278864260.0,
"reward": 1.426851749420166,
"reward_std": 0.18421000242233276,
"rewards/accuracy_reward_long_step": 0.54296875,
"rewards/final_brier_reward_long_step": 0.7497539520263672,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7857784032821655,
"step": 570
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 418.0,
"completions/max_terminated_length": 418.0,
"completions/mean_length": 236.28125,
"completions/mean_terminated_length": 236.28125,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.9136,
"grad_norm": 0.03796105086803436,
"learning_rate": 9.608540925266903e-08,
"loss": -0.0075,
"num_tokens": 279345732.0,
"reward": 1.2733724117279053,
"reward_std": 0.17839360237121582,
"rewards/accuracy_reward_long_step": 0.41015625,
"rewards/final_brier_reward_long_step": 0.6511929631233215,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8016713857650757,
"step": 571
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 525.0,
"completions/max_terminated_length": 525.0,
"completions/mean_length": 243.11328125,
"completions/mean_terminated_length": 243.11328125,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.9152,
"grad_norm": 0.03715438395738602,
"learning_rate": 9.430604982206406e-08,
"loss": 0.0104,
"num_tokens": 279841417.0,
"reward": 1.3816213607788086,
"reward_std": 0.12980535626411438,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.7918597459793091,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7815006971359253,
"step": 572
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 532.0,
"completions/max_terminated_length": 532.0,
"completions/mean_length": 242.07421875,
"completions/mean_terminated_length": 242.07421875,
"completions/min_length": 115.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.9168,
"grad_norm": 0.04261266440153122,
"learning_rate": 9.252669039145906e-08,
"loss": -0.0052,
"num_tokens": 280341788.0,
"reward": 1.4158134460449219,
"reward_std": 0.09869112074375153,
"rewards/accuracy_reward_long_step": 0.50390625,
"rewards/final_brier_reward_long_step": 0.8082069754600525,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8394216299057007,
"step": 573
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 226.296875,
"completions/mean_terminated_length": 226.296875,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.9184,
"grad_norm": 0.07661443948745728,
"learning_rate": 9.074733096085409e-08,
"loss": -0.0077,
"num_tokens": 280823224.0,
"reward": 1.532405138015747,
"reward_std": 0.08156967163085938,
"rewards/accuracy_reward_long_step": 0.64453125,
"rewards/final_brier_reward_long_step": 0.7845557928085327,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.766939640045166,
"step": 574
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 708.0,
"completions/max_terminated_length": 708.0,
"completions/mean_length": 240.23046875,
"completions/mean_terminated_length": 240.23046875,
"completions/min_length": 138.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.92,
"grad_norm": 0.05118957906961441,
"learning_rate": 8.896797153024912e-08,
"loss": -0.01,
"num_tokens": 281322563.0,
"reward": 1.353409767150879,
"reward_std": 0.12275659292936325,
"rewards/accuracy_reward_long_step": 0.44921875,
"rewards/final_brier_reward_long_step": 0.7173187732696533,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8994452357292175,
"step": 575
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 577.0,
"completions/max_terminated_length": 577.0,
"completions/mean_length": 236.58984375,
"completions/mean_terminated_length": 236.58984375,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.9216,
"grad_norm": 0.044467244297266006,
"learning_rate": 8.718861209964412e-08,
"loss": 0.0093,
"num_tokens": 281803666.0,
"reward": 1.394946575164795,
"reward_std": 0.09921001642942429,
"rewards/accuracy_reward_long_step": 0.515625,
"rewards/final_brier_reward_long_step": 0.771274209022522,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7460117340087891,
"step": 576
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.0,
"completions/max_terminated_length": 374.0,
"completions/mean_length": 219.89453125,
"completions/mean_terminated_length": 219.89453125,
"completions/min_length": 135.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.9232,
"grad_norm": 0.04329831525683403,
"learning_rate": 8.540925266903915e-08,
"loss": -0.0061,
"num_tokens": 282247735.0,
"reward": 1.3368597030639648,
"reward_std": 0.12454381585121155,
"rewards/accuracy_reward_long_step": 0.46484375,
"rewards/final_brier_reward_long_step": 0.7124074101448059,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7756561636924744,
"step": 577
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 447.0,
"completions/max_terminated_length": 447.0,
"completions/mean_length": 232.28515625,
"completions/mean_terminated_length": 232.28515625,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.9248,
"grad_norm": 0.043502867221832275,
"learning_rate": 8.362989323843416e-08,
"loss": -0.0013,
"num_tokens": 282739784.0,
"reward": 1.2974281311035156,
"reward_std": 0.17717978358268738,
"rewards/accuracy_reward_long_step": 0.4453125,
"rewards/final_brier_reward_long_step": 0.6866112947463989,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7218515872955322,
"step": 578
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 520.0,
"completions/max_terminated_length": 520.0,
"completions/mean_length": 229.37890625,
"completions/mean_terminated_length": 229.37890625,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.9264,
"grad_norm": 0.038043197244405746,
"learning_rate": 8.185053380782917e-08,
"loss": 0.0001,
"num_tokens": 283228401.0,
"reward": 1.4870903491973877,
"reward_std": 0.11480262130498886,
"rewards/accuracy_reward_long_step": 0.58203125,
"rewards/final_brier_reward_long_step": 0.7964640259742737,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.823772668838501,
"step": 579
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 448.0,
"completions/max_terminated_length": 448.0,
"completions/mean_length": 231.578125,
"completions/mean_terminated_length": 231.578125,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.928,
"grad_norm": 0.04364067688584328,
"learning_rate": 8.00711743772242e-08,
"loss": 0.0038,
"num_tokens": 283710941.0,
"reward": 1.3739776611328125,
"reward_std": 0.0760713666677475,
"rewards/accuracy_reward_long_step": 0.453125,
"rewards/final_brier_reward_long_step": 0.8134101629257202,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8700001239776611,
"step": 580
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 444.0,
"completions/max_terminated_length": 444.0,
"completions/mean_length": 241.80078125,
"completions/mean_terminated_length": 241.80078125,
"completions/min_length": 100.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.9296,
"grad_norm": 0.04623554274439812,
"learning_rate": 7.829181494661922e-08,
"loss": -0.0163,
"num_tokens": 284203962.0,
"reward": 1.3043211698532104,
"reward_std": 0.10367835313081741,
"rewards/accuracy_reward_long_step": 0.42578125,
"rewards/final_brier_reward_long_step": 0.7652456760406494,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7489140033721924,
"step": 581
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 665.0,
"completions/max_terminated_length": 665.0,
"completions/mean_length": 248.40234375,
"completions/mean_terminated_length": 248.40234375,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.9312,
"grad_norm": 0.046936266124248505,
"learning_rate": 7.651245551601423e-08,
"loss": 0.0062,
"num_tokens": 284689161.0,
"reward": 1.2649556398391724,
"reward_std": 0.13359886407852173,
"rewards/accuracy_reward_long_step": 0.41015625,
"rewards/final_brier_reward_long_step": 0.6901007294654846,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7290970087051392,
"step": 582
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 409.0,
"completions/max_terminated_length": 409.0,
"completions/mean_length": 224.82421875,
"completions/mean_terminated_length": 224.82421875,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.9328,
"grad_norm": 0.048148345202207565,
"learning_rate": 7.473309608540925e-08,
"loss": 0.0091,
"num_tokens": 285176220.0,
"reward": 1.545983076095581,
"reward_std": 0.10164255648851395,
"rewards/accuracy_reward_long_step": 0.64453125,
"rewards/final_brier_reward_long_step": 0.7637656331062317,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8420416116714478,
"step": 583
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 444.0,
"completions/max_terminated_length": 444.0,
"completions/mean_length": 234.75390625,
"completions/mean_terminated_length": 234.75390625,
"completions/min_length": 143.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.9344,
"grad_norm": 0.0379473976790905,
"learning_rate": 7.295373665480427e-08,
"loss": -0.0063,
"num_tokens": 285682757.0,
"reward": 1.407658338546753,
"reward_std": 0.1226111352443695,
"rewards/accuracy_reward_long_step": 0.51953125,
"rewards/final_brier_reward_long_step": 0.7624057531356812,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7901023626327515,
"step": 584
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 509.0,
"completions/max_terminated_length": 509.0,
"completions/mean_length": 235.9140625,
"completions/mean_terminated_length": 235.9140625,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.936,
"grad_norm": 0.04213576763868332,
"learning_rate": 7.117437722419929e-08,
"loss": 0.0006,
"num_tokens": 286148327.0,
"reward": 1.3678431510925293,
"reward_std": 0.13030381500720978,
"rewards/accuracy_reward_long_step": 0.484375,
"rewards/final_brier_reward_long_step": 0.7432855367660522,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7905872464179993,
"step": 585
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 571.0,
"completions/max_terminated_length": 571.0,
"completions/mean_length": 239.28125,
"completions/mean_terminated_length": 239.28125,
"completions/min_length": 135.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.9376,
"grad_norm": 0.039584312587976456,
"learning_rate": 6.93950177935943e-08,
"loss": -0.0023,
"num_tokens": 286619447.0,
"reward": 1.4525396823883057,
"reward_std": 0.1256856620311737,
"rewards/accuracy_reward_long_step": 0.5625,
"rewards/final_brier_reward_long_step": 0.7838070392608643,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.776351809501648,
"step": 586
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 472.0,
"completions/max_terminated_length": 472.0,
"completions/mean_length": 245.26953125,
"completions/mean_terminated_length": 245.26953125,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.9392,
"grad_norm": 0.039651911705732346,
"learning_rate": 6.761565836298933e-08,
"loss": 0.0029,
"num_tokens": 287116388.0,
"reward": 1.378925085067749,
"reward_std": 0.12979546189308167,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.7602671980857849,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7710578441619873,
"step": 587
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 519.0,
"completions/max_terminated_length": 519.0,
"completions/mean_length": 232.0,
"completions/mean_terminated_length": 232.0,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.9408,
"grad_norm": 0.05170515552163124,
"learning_rate": 6.583629893238433e-08,
"loss": -0.0055,
"num_tokens": 287604604.0,
"reward": 1.4521524906158447,
"reward_std": 0.19186797738075256,
"rewards/accuracy_reward_long_step": 0.55859375,
"rewards/final_brier_reward_long_step": 0.796981692314148,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.777253270149231,
"step": 588
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 446.0,
"completions/max_terminated_length": 446.0,
"completions/mean_length": 230.9609375,
"completions/mean_terminated_length": 230.9609375,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.9424,
"grad_norm": 0.043422844260931015,
"learning_rate": 6.405693950177936e-08,
"loss": -0.0021,
"num_tokens": 288080906.0,
"reward": 1.5288910865783691,
"reward_std": 0.10271154344081879,
"rewards/accuracy_reward_long_step": 0.61328125,
"rewards/final_brier_reward_long_step": 0.816925048828125,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8455142974853516,
"step": 589
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 621.0,
"completions/max_terminated_length": 621.0,
"completions/mean_length": 236.06640625,
"completions/mean_terminated_length": 236.06640625,
"completions/min_length": 135.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.944,
"grad_norm": 0.036451030522584915,
"learning_rate": 6.227758007117437e-08,
"loss": 0.0043,
"num_tokens": 288552851.0,
"reward": 1.5121817588806152,
"reward_std": 0.1267908215522766,
"rewards/accuracy_reward_long_step": 0.62109375,
"rewards/final_brier_reward_long_step": 0.7251984477043152,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8391537070274353,
"step": 590
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 232.703125,
"completions/mean_terminated_length": 232.703125,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.9456,
"grad_norm": 0.04694944620132446,
"learning_rate": 6.049822064056939e-08,
"loss": -0.0057,
"num_tokens": 289027695.0,
"reward": 1.4363982677459717,
"reward_std": 0.15798181295394897,
"rewards/accuracy_reward_long_step": 0.55078125,
"rewards/final_brier_reward_long_step": 0.7334683537483215,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.80899977684021,
"step": 591
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 516.0,
"completions/max_terminated_length": 516.0,
"completions/mean_length": 244.984375,
"completions/mean_terminated_length": 244.984375,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.9472,
"grad_norm": 0.0597044937312603,
"learning_rate": 5.871886120996441e-08,
"loss": -0.016,
"num_tokens": 289507067.0,
"reward": 1.3889837265014648,
"reward_std": 0.08240145444869995,
"rewards/accuracy_reward_long_step": 0.45703125,
"rewards/final_brier_reward_long_step": 0.8605644702911377,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8672455549240112,
"step": 592
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 554.0,
"completions/max_terminated_length": 554.0,
"completions/mean_length": 233.28515625,
"completions/mean_terminated_length": 233.28515625,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.9488,
"grad_norm": 0.036467116326093674,
"learning_rate": 5.6939501779359424e-08,
"loss": 0.0026,
"num_tokens": 289991372.0,
"reward": 1.28902006149292,
"reward_std": 0.11750981956720352,
"rewards/accuracy_reward_long_step": 0.4375,
"rewards/final_brier_reward_long_step": 0.6445460915565491,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7615340352058411,
"step": 593
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 563.0,
"completions/max_terminated_length": 563.0,
"completions/mean_length": 227.6171875,
"completions/mean_terminated_length": 227.6171875,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.9504,
"grad_norm": 0.09449837356805801,
"learning_rate": 5.516014234875445e-08,
"loss": 0.0093,
"num_tokens": 290484434.0,
"reward": 1.574216604232788,
"reward_std": 0.11178240180015564,
"rewards/accuracy_reward_long_step": 0.69140625,
"rewards/final_brier_reward_long_step": 0.7768968343734741,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7543442845344543,
"step": 594
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 527.0,
"completions/max_terminated_length": 527.0,
"completions/mean_length": 244.6015625,
"completions/mean_terminated_length": 244.6015625,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.952,
"grad_norm": 0.03990272060036659,
"learning_rate": 5.3380782918149466e-08,
"loss": 0.0085,
"num_tokens": 290989700.0,
"reward": 1.508046269416809,
"reward_std": 0.10411694645881653,
"rewards/accuracy_reward_long_step": 0.6015625,
"rewards/final_brier_reward_long_step": 0.8094656467437744,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.824282169342041,
"step": 595
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 498.0,
"completions/max_terminated_length": 498.0,
"completions/mean_length": 258.6171875,
"completions/mean_terminated_length": 258.6171875,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.9536,
"grad_norm": 0.035313017666339874,
"learning_rate": 5.160142348754448e-08,
"loss": 0.0136,
"num_tokens": 291475994.0,
"reward": 1.4012870788574219,
"reward_std": 0.130996972322464,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.8204870820045471,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7846609950065613,
"step": 596
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 506.0,
"completions/max_terminated_length": 506.0,
"completions/mean_length": 244.3515625,
"completions/mean_terminated_length": 244.3515625,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.9552,
"grad_norm": 0.04008813574910164,
"learning_rate": 4.98220640569395e-08,
"loss": 0.002,
"num_tokens": 291959996.0,
"reward": 1.3849501609802246,
"reward_std": 0.12151844799518585,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.7529085874557495,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.6931424140930176,
"step": 597
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 492.0,
"completions/max_terminated_length": 492.0,
"completions/mean_length": 226.16015625,
"completions/mean_terminated_length": 226.16015625,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.9568,
"grad_norm": 0.04041390120983124,
"learning_rate": 4.8042704626334516e-08,
"loss": -0.0106,
"num_tokens": 292435517.0,
"reward": 1.3907995223999023,
"reward_std": 0.08410888910293579,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.7949777245521545,
"rewards/format_reward_long_step": 0.99609375,
"rewards/stepwise_brier_reward_long_step": 0.7760331034660339,
"step": 598
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 580.0,
"completions/max_terminated_length": 580.0,
"completions/mean_length": 244.42578125,
"completions/mean_terminated_length": 244.42578125,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.9584,
"grad_norm": 0.042032286524772644,
"learning_rate": 4.626334519572953e-08,
"loss": 0.0048,
"num_tokens": 292911202.0,
"reward": 1.2448697090148926,
"reward_std": 0.10349094122648239,
"rewards/accuracy_reward_long_step": 0.375,
"rewards/final_brier_reward_long_step": 0.7020269632339478,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7774521112442017,
"step": 599
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 530.0,
"completions/max_terminated_length": 530.0,
"completions/mean_length": 230.69921875,
"completions/mean_terminated_length": 230.69921875,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.96,
"grad_norm": 0.04338167607784271,
"learning_rate": 4.448398576512456e-08,
"loss": 0.0117,
"num_tokens": 293390637.0,
"reward": 1.550196886062622,
"reward_std": 0.1076919287443161,
"rewards/accuracy_reward_long_step": 0.6484375,
"rewards/final_brier_reward_long_step": 0.8495925664901733,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7574446201324463,
"step": 600
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 555.0,
"completions/max_terminated_length": 555.0,
"completions/mean_length": 242.64453125,
"completions/mean_terminated_length": 242.64453125,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.9616,
"grad_norm": 0.044204358011484146,
"learning_rate": 4.270462633451957e-08,
"loss": 0.0193,
"num_tokens": 293870786.0,
"reward": 1.3222875595092773,
"reward_std": 0.09255368262529373,
"rewards/accuracy_reward_long_step": 0.3984375,
"rewards/final_brier_reward_long_step": 0.8258058428764343,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8695942163467407,
"step": 601
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 463.0,
"completions/max_terminated_length": 463.0,
"completions/mean_length": 238.5390625,
"completions/mean_terminated_length": 238.5390625,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.9632,
"grad_norm": 0.04069282487034798,
"learning_rate": 4.092526690391459e-08,
"loss": -0.0003,
"num_tokens": 294354396.0,
"reward": 1.4837543964385986,
"reward_std": 0.1288076937198639,
"rewards/accuracy_reward_long_step": 0.5703125,
"rewards/final_brier_reward_long_step": 0.8007269501686096,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8530406355857849,
"step": 602
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 557.0,
"completions/max_terminated_length": 557.0,
"completions/mean_length": 241.36328125,
"completions/mean_terminated_length": 241.36328125,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.9648,
"grad_norm": 0.0698588415980339,
"learning_rate": 3.914590747330961e-08,
"loss": -0.0062,
"num_tokens": 294831593.0,
"reward": 1.3811883926391602,
"reward_std": 0.12175668030977249,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.9002180099487305,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7495359182357788,
"step": 603
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 455.0,
"completions/max_terminated_length": 455.0,
"completions/mean_length": 228.00390625,
"completions/mean_terminated_length": 228.00390625,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.9664,
"grad_norm": 0.04091455414891243,
"learning_rate": 3.736654804270462e-08,
"loss": 0.0007,
"num_tokens": 295312322.0,
"reward": 1.3806979656219482,
"reward_std": 0.1109173595905304,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.746747612953186,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7760441303253174,
"step": 604
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 456.0,
"completions/max_terminated_length": 456.0,
"completions/mean_length": 241.90234375,
"completions/mean_terminated_length": 241.90234375,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.968,
"grad_norm": 0.054420799016952515,
"learning_rate": 3.5587188612099644e-08,
"loss": 0.0025,
"num_tokens": 295800753.0,
"reward": 1.385452389717102,
"reward_std": 0.18200629949569702,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.6535894870758057,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7944704294204712,
"step": 605
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 474.0,
"completions/max_terminated_length": 474.0,
"completions/mean_length": 237.26171875,
"completions/mean_terminated_length": 237.26171875,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.9696,
"grad_norm": 0.04651153087615967,
"learning_rate": 3.3807829181494665e-08,
"loss": 0.0115,
"num_tokens": 296282412.0,
"reward": 1.462805151939392,
"reward_std": 0.12919017672538757,
"rewards/accuracy_reward_long_step": 0.6015625,
"rewards/final_brier_reward_long_step": 0.685794472694397,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7591761350631714,
"step": 606
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 517.0,
"completions/max_terminated_length": 517.0,
"completions/mean_length": 231.671875,
"completions/mean_terminated_length": 231.671875,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.9712,
"grad_norm": 0.046628162264823914,
"learning_rate": 3.202846975088968e-08,
"loss": 0.0057,
"num_tokens": 296766752.0,
"reward": 1.5526325702667236,
"reward_std": 0.08376991003751755,
"rewards/accuracy_reward_long_step": 0.62890625,
"rewards/final_brier_reward_long_step": 0.8181566596031189,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8767487406730652,
"step": 607
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 499.0,
"completions/max_terminated_length": 499.0,
"completions/mean_length": 244.3359375,
"completions/mean_terminated_length": 244.3359375,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.9728,
"grad_norm": 0.045422233641147614,
"learning_rate": 3.0249110320284694e-08,
"loss": 0.0073,
"num_tokens": 297252086.0,
"reward": 1.3045486211776733,
"reward_std": 0.10245119035243988,
"rewards/accuracy_reward_long_step": 0.4140625,
"rewards/final_brier_reward_long_step": 0.7651957273483276,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7967486381530762,
"step": 608
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 624.0,
"completions/max_terminated_length": 624.0,
"completions/mean_length": 243.4375,
"completions/mean_terminated_length": 243.4375,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.9744,
"grad_norm": 0.040375716984272,
"learning_rate": 2.8469750889679712e-08,
"loss": 0.013,
"num_tokens": 297745582.0,
"reward": 1.4966931343078613,
"reward_std": 0.092777319252491,
"rewards/accuracy_reward_long_step": 0.57421875,
"rewards/final_brier_reward_long_step": 0.837005078792572,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8528923988342285,
"step": 609
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 625.0,
"completions/max_terminated_length": 625.0,
"completions/mean_length": 242.9765625,
"completions/mean_terminated_length": 242.9765625,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.976,
"grad_norm": 0.043781962245702744,
"learning_rate": 2.6690391459074733e-08,
"loss": 0.006,
"num_tokens": 298209824.0,
"reward": 1.462713360786438,
"reward_std": 0.14046858251094818,
"rewards/accuracy_reward_long_step": 0.56640625,
"rewards/final_brier_reward_long_step": 0.7839847803115845,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8012436628341675,
"step": 610
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 439.0,
"completions/max_terminated_length": 439.0,
"completions/mean_length": 224.9609375,
"completions/mean_terminated_length": 224.9609375,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.9776,
"grad_norm": 0.03858632594347,
"learning_rate": 2.491103202846975e-08,
"loss": 0.0032,
"num_tokens": 298700686.0,
"reward": 1.3872606754302979,
"reward_std": 0.07525929063558578,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.740240216255188,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8088023662567139,
"step": 611
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 446.0,
"completions/max_terminated_length": 446.0,
"completions/mean_length": 233.19140625,
"completions/mean_terminated_length": 233.19140625,
"completions/min_length": 100.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.9792,
"grad_norm": 0.04854563623666763,
"learning_rate": 2.3131672597864765e-08,
"loss": -0.007,
"num_tokens": 299182631.0,
"reward": 1.2626285552978516,
"reward_std": 0.09427875280380249,
"rewards/accuracy_reward_long_step": 0.37890625,
"rewards/final_brier_reward_long_step": 0.7738581895828247,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7610312700271606,
"step": 612
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 438.0,
"completions/max_terminated_length": 438.0,
"completions/mean_length": 232.41796875,
"completions/mean_terminated_length": 232.41796875,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.9808,
"grad_norm": 0.04861883446574211,
"learning_rate": 2.1352313167259786e-08,
"loss": -0.0034,
"num_tokens": 299679130.0,
"reward": 1.4753010272979736,
"reward_std": 0.07958254218101501,
"rewards/accuracy_reward_long_step": 0.5546875,
"rewards/final_brier_reward_long_step": 0.8655683994293213,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8168855905532837,
"step": 613
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 385.0,
"completions/max_terminated_length": 385.0,
"completions/mean_length": 222.76953125,
"completions/mean_terminated_length": 222.76953125,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.9824,
"grad_norm": 0.04186774417757988,
"learning_rate": 1.9572953736654804e-08,
"loss": -0.0007,
"num_tokens": 300167079.0,
"reward": 1.3874082565307617,
"reward_std": 0.0872558057308197,
"rewards/accuracy_reward_long_step": 0.5234375,
"rewards/final_brier_reward_long_step": 0.6611804366111755,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7947026491165161,
"step": 614
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 704.0,
"completions/max_terminated_length": 704.0,
"completions/mean_length": 254.33984375,
"completions/mean_terminated_length": 254.33984375,
"completions/min_length": 157.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.984,
"grad_norm": 0.0385391041636467,
"learning_rate": 1.7793594306049822e-08,
"loss": 0.0161,
"num_tokens": 300662414.0,
"reward": 1.3372551202774048,
"reward_std": 0.15426021814346313,
"rewards/accuracy_reward_long_step": 0.4296875,
"rewards/final_brier_reward_long_step": 0.7895093560218811,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8407611846923828,
"step": 615
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 507.0,
"completions/max_terminated_length": 507.0,
"completions/mean_length": 250.22265625,
"completions/mean_terminated_length": 250.22265625,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.9856,
"grad_norm": 0.03464104235172272,
"learning_rate": 1.601423487544484e-08,
"loss": 0.0092,
"num_tokens": 301154455.0,
"reward": 1.3746635913848877,
"reward_std": 0.11654820293188095,
"rewards/accuracy_reward_long_step": 0.46875,
"rewards/final_brier_reward_long_step": 0.7886804342269897,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8349737524986267,
"step": 616
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 416.0,
"completions/max_terminated_length": 416.0,
"completions/mean_length": 237.05078125,
"completions/mean_terminated_length": 237.05078125,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.9872,
"grad_norm": 0.05452824756503105,
"learning_rate": 1.4234875444839856e-08,
"loss": 0.0012,
"num_tokens": 301632876.0,
"reward": 1.377845287322998,
"reward_std": 0.12956568598747253,
"rewards/accuracy_reward_long_step": 0.49609375,
"rewards/final_brier_reward_long_step": 0.7508812546730042,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7761249542236328,
"step": 617
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 467.0,
"completions/max_terminated_length": 467.0,
"completions/mean_length": 234.171875,
"completions/mean_terminated_length": 234.171875,
"completions/min_length": 126.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.9888,
"grad_norm": 0.04497021064162254,
"learning_rate": 1.2455516014234875e-08,
"loss": -0.003,
"num_tokens": 302116440.0,
"reward": 1.360137701034546,
"reward_std": 0.2063150405883789,
"rewards/accuracy_reward_long_step": 0.5,
"rewards/final_brier_reward_long_step": 0.7225565910339355,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7179945111274719,
"step": 618
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 425.0,
"completions/max_terminated_length": 425.0,
"completions/mean_length": 239.23828125,
"completions/mean_terminated_length": 239.23828125,
"completions/min_length": 131.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.9904,
"grad_norm": 0.038744006305933,
"learning_rate": 1.0676156583629893e-08,
"loss": -0.0001,
"num_tokens": 302594637.0,
"reward": 1.3021314144134521,
"reward_std": 0.11754617094993591,
"rewards/accuracy_reward_long_step": 0.41796875,
"rewards/final_brier_reward_long_step": 0.7479242086410522,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7887262105941772,
"step": 619
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 417.0,
"completions/max_terminated_length": 417.0,
"completions/mean_length": 228.94140625,
"completions/mean_terminated_length": 228.94140625,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.992,
"grad_norm": 0.035442836582660675,
"learning_rate": 8.896797153024911e-09,
"loss": 0.0013,
"num_tokens": 303086934.0,
"reward": 1.61018705368042,
"reward_std": 0.09774182736873627,
"rewards/accuracy_reward_long_step": 0.71484375,
"rewards/final_brier_reward_long_step": 0.7831144332885742,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7982592582702637,
"step": 620
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 371.0,
"completions/max_terminated_length": 371.0,
"completions/mean_length": 224.3203125,
"completions/mean_terminated_length": 224.3203125,
"completions/min_length": 137.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.9936,
"grad_norm": 0.05612906068563461,
"learning_rate": 7.117437722419928e-09,
"loss": 0.0006,
"num_tokens": 303565288.0,
"reward": 1.5199366807937622,
"reward_std": 0.0858568549156189,
"rewards/accuracy_reward_long_step": 0.62109375,
"rewards/final_brier_reward_long_step": 0.7536445260047913,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8417270183563232,
"step": 621
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 556.0,
"completions/max_terminated_length": 556.0,
"completions/mean_length": 244.44921875,
"completions/mean_terminated_length": 244.44921875,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.9952,
"grad_norm": 0.05516738444566727,
"learning_rate": 5.338078291814947e-09,
"loss": 0.0138,
"num_tokens": 304051859.0,
"reward": 1.3782299757003784,
"reward_std": 0.11134977638721466,
"rewards/accuracy_reward_long_step": 0.4765625,
"rewards/final_brier_reward_long_step": 0.770743727684021,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8359257578849792,
"step": 622
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 415.0,
"completions/max_terminated_length": 415.0,
"completions/mean_length": 227.07421875,
"completions/mean_terminated_length": 227.07421875,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.9968,
"grad_norm": 0.04374608024954796,
"learning_rate": 3.558718861209964e-09,
"loss": -0.0019,
"num_tokens": 304545614.0,
"reward": 1.507850170135498,
"reward_std": 0.16186021268367767,
"rewards/accuracy_reward_long_step": 0.6171875,
"rewards/final_brier_reward_long_step": 0.7282167673110962,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8344339728355408,
"step": 623
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 519.0,
"completions/max_terminated_length": 519.0,
"completions/mean_length": 238.9609375,
"completions/mean_terminated_length": 238.9609375,
"completions/min_length": 84.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.9984,
"grad_norm": 0.042679328471422195,
"learning_rate": 1.779359430604982e-09,
"loss": 0.0036,
"num_tokens": 305024668.0,
"reward": 1.3625683784484863,
"reward_std": 0.12260974198579788,
"rewards/accuracy_reward_long_step": 0.48828125,
"rewards/final_brier_reward_long_step": 0.7129184007644653,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.7842304706573486,
"step": 624
},
{
"calib/answer_extract_rate": 0.0,
"calib/avg_num_step_conf": 0.0,
"calib/final_conf_rate": 0.0,
"calib/format_rate": 0.0,
"calib/nonempty_final_conf_rate": 0.0,
"calib/nonempty_reasoning_rate": 0.0,
"calib/nonempty_step_conf_rate": 0.0,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 465.0,
"completions/max_terminated_length": 465.0,
"completions/mean_length": 234.5,
"completions/mean_terminated_length": 234.5,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 1.0,
"grad_norm": 0.04114016145467758,
"learning_rate": 0.0,
"loss": 0.017,
"num_tokens": 305506462.0,
"reward": 1.5832818746566772,
"reward_std": 0.10286815464496613,
"rewards/accuracy_reward_long_step": 0.6875,
"rewards/final_brier_reward_long_step": 0.7612390518188477,
"rewards/format_reward_long_step": 1.0,
"rewards/stepwise_brier_reward_long_step": 0.8218887448310852,
"step": 625
},
{
"epoch": 1.0,
"step": 625,
"total_flos": 0.0,
"train_loss": -0.007067593541555107,
"train_runtime": 30966.4861,
"train_samples_per_second": 0.646,
"train_steps_per_second": 0.02
}
],
"logging_steps": 1,
"max_steps": 625,
"num_input_tokens_seen": 305506462,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}