{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1640625, "completions/max_length": 1000.0, "completions/max_terminated_length": 1000.0, "completions/mean_length": 341.4609375, "completions/mean_terminated_length": 408.47662353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0016, "grad_norm": 0.17134852707386017, "learning_rate": 1.5873015873015872e-08, "loss": -0.116, "num_tokens": 486582.0, "reward": 0.41310209035873413, "reward_std": 0.4805126190185547, "rewards/accuracy_reward_long_step": 0.2265625, "rewards/final_brier_reward_long_step": 0.11814829707145691, "rewards/format_reward_long_step": 0.23046875, "rewards/stepwise_brier_reward_long_step": 0.1670725792646408, "step": 1 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.19140625, "completions/max_length": 1019.0, "completions/max_terminated_length": 1019.0, "completions/mean_length": 303.75, "completions/mean_terminated_length": 375.65216064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.6887706518173218, "learning_rate": 3.1746031746031744e-08, "loss": -0.1486, "num_tokens": 985630.0, "reward": 0.4098304212093353, "reward_std": 0.5015645623207092, "rewards/accuracy_reward_long_step": 0.1875, "rewards/final_brier_reward_long_step": 0.1355031430721283, "rewards/format_reward_long_step": 0.27734375, "rewards/stepwise_brier_reward_long_step": 0.1991310715675354, "step": 2 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15234375, "completions/max_length": 1022.0, "completions/max_terminated_length": 1022.0, "completions/mean_length": 353.46484375, "completions/mean_terminated_length": 416.99078369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0048, "grad_norm": 0.411814421415329, "learning_rate": 4.7619047619047613e-08, "loss": -0.1027, "num_tokens": 1490821.0, "reward": 0.41081345081329346, "reward_std": 0.5538315773010254, "rewards/accuracy_reward_long_step": 0.19921875, "rewards/final_brier_reward_long_step": 0.13149982690811157, "rewards/format_reward_long_step": 0.25390625, "rewards/stepwise_brier_reward_long_step": 0.20706646144390106, "step": 3 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 944.0, "completions/max_terminated_length": 944.0, "completions/mean_length": 340.40625, "completions/mean_terminated_length": 403.4444580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.40814000368118286, "learning_rate": 6.349206349206349e-08, "loss": -0.0835, "num_tokens": 2004965.0, "reward": 0.3751431107521057, "reward_std": 0.48189181089401245, "rewards/accuracy_reward_long_step": 0.1875, "rewards/final_brier_reward_long_step": 0.11413241922855377, "rewards/format_reward_long_step": 0.23046875, "rewards/stepwise_brier_reward_long_step": 0.17550255358219147, "step": 4 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15234375, "completions/max_length": 1001.0, "completions/max_terminated_length": 1001.0, "completions/mean_length": 343.70703125, "completions/mean_terminated_length": 405.479248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008, "grad_norm": 0.37365081906318665, "learning_rate": 7.936507936507936e-08, "loss": -0.0435, "num_tokens": 2528514.0, "reward": 0.34497031569480896, "reward_std": 0.45299145579338074, "rewards/accuracy_reward_long_step": 0.14453125, "rewards/final_brier_reward_long_step": 0.10058828443288803, "rewards/format_reward_long_step": 0.26953125, "rewards/stepwise_brier_reward_long_step": 0.1621055006980896, "step": 5 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1640625, "completions/max_length": 996.0, "completions/max_terminated_length": 996.0, "completions/mean_length": 310.33203125, "completions/mean_terminated_length": 371.2383117675781, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 2.2686784267425537, "learning_rate": 9.523809523809523e-08, "loss": -0.1196, "num_tokens": 3035623.0, "reward": 0.39475879073143005, "reward_std": 0.5006267428398132, "rewards/accuracy_reward_long_step": 0.1953125, "rewards/final_brier_reward_long_step": 0.11408085376024246, "rewards/format_reward_long_step": 0.2421875, "rewards/stepwise_brier_reward_long_step": 0.19932931661605835, "step": 6 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1640625, "completions/max_length": 1011.0, "completions/max_terminated_length": 1011.0, "completions/mean_length": 349.5703125, "completions/mean_terminated_length": 418.17755126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0112, "grad_norm": 0.5209683179855347, "learning_rate": 1.111111111111111e-07, "loss": -0.1158, "num_tokens": 3544593.0, "reward": 0.36087095737457275, "reward_std": 0.5047852993011475, "rewards/accuracy_reward_long_step": 0.16015625, "rewards/final_brier_reward_long_step": 0.11222599446773529, "rewards/format_reward_long_step": 0.25390625, "rewards/stepwise_brier_reward_long_step": 0.18282026052474976, "step": 7 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.19140625, "completions/max_length": 1020.0, "completions/max_terminated_length": 1020.0, "completions/mean_length": 334.12109375, "completions/mean_terminated_length": 413.2125549316406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.5285189151763916, "learning_rate": 1.2698412698412698e-07, "loss": -0.0977, "num_tokens": 4034728.0, "reward": 0.4276258945465088, "reward_std": 0.5124700665473938, "rewards/accuracy_reward_long_step": 0.1953125, "rewards/final_brier_reward_long_step": 0.13241875171661377, "rewards/format_reward_long_step": 0.2890625, "rewards/stepwise_brier_reward_long_step": 0.2187097817659378, "step": 8 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14453125, "completions/max_length": 976.0, "completions/max_terminated_length": 976.0, "completions/mean_length": 361.22265625, "completions/mean_terminated_length": 422.2511291503906, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0144, "grad_norm": 2.121281862258911, "learning_rate": 1.4285714285714285e-07, "loss": -0.1102, "num_tokens": 4560393.0, "reward": 0.2677909731864929, "reward_std": 0.41135305166244507, "rewards/accuracy_reward_long_step": 0.07421875, "rewards/final_brier_reward_long_step": 0.11064782738685608, "rewards/format_reward_long_step": 0.25, "rewards/stepwise_brier_reward_long_step": 0.163641095161438, "step": 9 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.2, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17578125, "completions/max_length": 1022.0, "completions/max_terminated_length": 1022.0, "completions/mean_length": 344.53515625, "completions/mean_terminated_length": 418.01422119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.2448827624320984, "learning_rate": 1.5873015873015872e-07, "loss": -0.1562, "num_tokens": 5068466.0, "reward": 0.3565204441547394, "reward_std": 0.4487614333629608, "rewards/accuracy_reward_long_step": 0.15234375, "rewards/final_brier_reward_long_step": 0.11602266132831573, "rewards/format_reward_long_step": 0.26171875, "rewards/stepwise_brier_reward_long_step": 0.17724668979644775, "step": 10 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 1023.0, "completions/max_terminated_length": 1023.0, "completions/mean_length": 349.59765625, "completions/mean_terminated_length": 403.1396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0176, "grad_norm": 0.8030067086219788, "learning_rate": 1.7460317460317458e-07, "loss": -0.0554, "num_tokens": 5589579.0, "reward": 0.2793608009815216, "reward_std": 0.404774010181427, "rewards/accuracy_reward_long_step": 0.10546875, "rewards/final_brier_reward_long_step": 0.08069999516010284, "rewards/format_reward_long_step": 0.234375, "rewards/stepwise_brier_reward_long_step": 0.1461181938648224, "step": 11 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.18359375, "completions/max_length": 1011.0, "completions/max_terminated_length": 1011.0, "completions/mean_length": 322.5234375, "completions/mean_terminated_length": 395.0526123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.28563177585601807, "learning_rate": 1.9047619047619045e-07, "loss": -0.119, "num_tokens": 6094689.0, "reward": 0.35700535774230957, "reward_std": 0.45222049951553345, "rewards/accuracy_reward_long_step": 0.19140625, "rewards/final_brier_reward_long_step": 0.09916991740465164, "rewards/format_reward_long_step": 0.19921875, "rewards/stepwise_brier_reward_long_step": 0.16478905081748962, "step": 12 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16015625, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 322.3203125, "completions/mean_terminated_length": 383.7860412597656, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0208, "grad_norm": 0.1121608093380928, "learning_rate": 2.0634920634920632e-07, "loss": -0.0936, "num_tokens": 6610131.0, "reward": 0.410109281539917, "reward_std": 0.4506571292877197, "rewards/accuracy_reward_long_step": 0.1796875, "rewards/final_brier_reward_long_step": 0.12741835415363312, "rewards/format_reward_long_step": 0.296875, "rewards/stepwise_brier_reward_long_step": 0.2005188763141632, "step": 13 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.171875, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 317.0234375, "completions/mean_terminated_length": 382.8207702636719, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0224, "grad_norm": 0.11094717681407928, "learning_rate": 2.222222222222222e-07, "loss": -0.1125, "num_tokens": 7122753.0, "reward": 0.45112496614456177, "reward_std": 0.50334632396698, "rewards/accuracy_reward_long_step": 0.21484375, "rewards/final_brier_reward_long_step": 0.14254721999168396, "rewards/format_reward_long_step": 0.296875, "rewards/stepwise_brier_reward_long_step": 0.20882770419120789, "step": 14 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1796875, "completions/max_length": 966.0, "completions/max_terminated_length": 966.0, "completions/mean_length": 311.2265625, "completions/mean_terminated_length": 379.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024, "grad_norm": 0.13925662636756897, "learning_rate": 2.3809523809523806e-07, "loss": -0.1164, "num_tokens": 7630235.0, "reward": 0.31538814306259155, "reward_std": 0.4458809494972229, "rewards/accuracy_reward_long_step": 0.13671875, "rewards/final_brier_reward_long_step": 0.10075005888938904, "rewards/format_reward_long_step": 0.21875, "rewards/stepwise_brier_reward_long_step": 0.1764274686574936, "step": 15 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 1018.0, "completions/max_terminated_length": 1018.0, "completions/mean_length": 326.4453125, "completions/mean_terminated_length": 383.3486022949219, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0256, "grad_norm": 0.7503873109817505, "learning_rate": 2.5396825396825396e-07, "loss": -0.126, "num_tokens": 8156781.0, "reward": 0.4038216471672058, "reward_std": 0.5121759176254272, "rewards/accuracy_reward_long_step": 0.1875, "rewards/final_brier_reward_long_step": 0.13070036470890045, "rewards/format_reward_long_step": 0.26953125, "rewards/stepwise_brier_reward_long_step": 0.19552379846572876, "step": 16 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 1018.0, "completions/max_terminated_length": 1018.0, "completions/mean_length": 338.77734375, "completions/mean_terminated_length": 390.66217041015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0272, "grad_norm": 0.06501276046037674, "learning_rate": 2.698412698412698e-07, "loss": -0.1059, "num_tokens": 8640828.0, "reward": 0.3153911828994751, "reward_std": 0.4668186902999878, "rewards/accuracy_reward_long_step": 0.12890625, "rewards/final_brier_reward_long_step": 0.10800625383853912, "rewards/format_reward_long_step": 0.234375, "rewards/stepwise_brier_reward_long_step": 0.16918347775936127, "step": 17 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17578125, "completions/max_length": 979.0, "completions/max_terminated_length": 979.0, "completions/mean_length": 322.75390625, "completions/mean_terminated_length": 391.58770751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0288, "grad_norm": 0.1249445378780365, "learning_rate": 2.857142857142857e-07, "loss": -0.1224, "num_tokens": 9147797.0, "reward": 0.3471192717552185, "reward_std": 0.4712105989456177, "rewards/accuracy_reward_long_step": 0.14453125, "rewards/final_brier_reward_long_step": 0.11726874858140945, "rewards/format_reward_long_step": 0.25390625, "rewards/stepwise_brier_reward_long_step": 0.18527084589004517, "step": 18 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13671875, "completions/max_length": 1004.0, "completions/max_terminated_length": 1004.0, "completions/mean_length": 343.99609375, "completions/mean_terminated_length": 398.4751281738281, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0304, "grad_norm": 0.07186749577522278, "learning_rate": 3.0158730158730156e-07, "loss": -0.0533, "num_tokens": 9663764.0, "reward": 0.379126638174057, "reward_std": 0.49106040596961975, "rewards/accuracy_reward_long_step": 0.1796875, "rewards/final_brier_reward_long_step": 0.11015625298023224, "rewards/format_reward_long_step": 0.25, "rewards/stepwise_brier_reward_long_step": 0.18760032951831818, "step": 19 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 344.03125, "completions/mean_terminated_length": 400.3272705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.1281915009021759, "learning_rate": 3.1746031746031743e-07, "loss": -0.0944, "num_tokens": 10179540.0, "reward": 0.3329862952232361, "reward_std": 0.4402102828025818, "rewards/accuracy_reward_long_step": 0.1328125, "rewards/final_brier_reward_long_step": 0.11334909498691559, "rewards/format_reward_long_step": 0.2578125, "rewards/stepwise_brier_reward_long_step": 0.17172113060951233, "step": 20 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17578125, "completions/max_length": 927.0, "completions/max_terminated_length": 927.0, "completions/mean_length": 325.28515625, "completions/mean_terminated_length": 394.6587829589844, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0336, "grad_norm": 0.15362648665905, "learning_rate": 3.333333333333333e-07, "loss": -0.0859, "num_tokens": 10689989.0, "reward": 0.35474836826324463, "reward_std": 0.48964905738830566, "rewards/accuracy_reward_long_step": 0.15625, "rewards/final_brier_reward_long_step": 0.11406318843364716, "rewards/format_reward_long_step": 0.25, "rewards/stepwise_brier_reward_long_step": 0.17993025481700897, "step": 21 }, { "calib/answer_extract_rate": 0.01171875, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.01171875, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1953125, "completions/max_length": 997.0, "completions/max_terminated_length": 997.0, "completions/mean_length": 311.5625, "completions/mean_terminated_length": 387.1844787597656, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0352, "grad_norm": 0.21162962913513184, "learning_rate": 3.4920634920634917e-07, "loss": -0.1431, "num_tokens": 11193597.0, "reward": 0.32876265048980713, "reward_std": 0.4388379454612732, "rewards/accuracy_reward_long_step": 0.1171875, "rewards/final_brier_reward_long_step": 0.10059726983308792, "rewards/format_reward_long_step": 0.26953125, "rewards/stepwise_brier_reward_long_step": 0.20664086937904358, "step": 22 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 1010.0, "completions/max_terminated_length": 1010.0, "completions/mean_length": 347.2265625, "completions/mean_terminated_length": 398.6098937988281, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.0368, "grad_norm": 0.08162181079387665, "learning_rate": 3.6507936507936504e-07, "loss": -0.0443, "num_tokens": 11712407.0, "reward": 0.3704327940940857, "reward_std": 0.4654346704483032, "rewards/accuracy_reward_long_step": 0.171875, "rewards/final_brier_reward_long_step": 0.0984906256198883, "rewards/format_reward_long_step": 0.26171875, "rewards/stepwise_brier_reward_long_step": 0.17230293154716492, "step": 23 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1024.0, "completions/max_terminated_length": 1024.0, "completions/mean_length": 358.25, "completions/mean_terminated_length": 409.4285888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0384, "grad_norm": 0.22265669703483582, "learning_rate": 3.809523809523809e-07, "loss": -0.0882, "num_tokens": 12222919.0, "reward": 0.39699164032936096, "reward_std": 0.49701642990112305, "rewards/accuracy_reward_long_step": 0.17578125, "rewards/final_brier_reward_long_step": 0.11312989890575409, "rewards/format_reward_long_step": 0.28125, "rewards/stepwise_brier_reward_long_step": 0.20921160280704498, "step": 24 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.8, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 1015.0, "completions/max_terminated_length": 1015.0, "completions/mean_length": 361.26171875, "completions/mean_terminated_length": 398.63360595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.04, "grad_norm": 0.07138670980930328, "learning_rate": 3.968253968253968e-07, "loss": -0.0724, "num_tokens": 12748058.0, "reward": 0.39820796251296997, "reward_std": 0.4787534177303314, "rewards/accuracy_reward_long_step": 0.14453125, "rewards/final_brier_reward_long_step": 0.1328800767660141, "rewards/format_reward_long_step": 0.32421875, "rewards/stepwise_brier_reward_long_step": 0.23338933289051056, "step": 25 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17578125, "completions/max_length": 1008.0, "completions/max_terminated_length": 1008.0, "completions/mean_length": 326.24609375, "completions/mean_terminated_length": 395.82464599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0416, "grad_norm": 0.06991428881883621, "learning_rate": 4.1269841269841265e-07, "loss": -0.1661, "num_tokens": 13269377.0, "reward": 0.3767819404602051, "reward_std": 0.46116840839385986, "rewards/accuracy_reward_long_step": 0.15625, "rewards/final_brier_reward_long_step": 0.11419257521629333, "rewards/format_reward_long_step": 0.2890625, "rewards/stepwise_brier_reward_long_step": 0.1898101270198822, "step": 26 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14453125, "completions/max_length": 989.0, "completions/max_terminated_length": 989.0, "completions/mean_length": 352.62109375, "completions/mean_terminated_length": 412.1963195800781, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0432, "grad_norm": 0.18298697471618652, "learning_rate": 4.285714285714285e-07, "loss": -0.0767, "num_tokens": 13776568.0, "reward": 0.3685033917427063, "reward_std": 0.4383317232131958, "rewards/accuracy_reward_long_step": 0.13671875, "rewards/final_brier_reward_long_step": 0.11230936646461487, "rewards/format_reward_long_step": 0.3046875, "rewards/stepwise_brier_reward_long_step": 0.20545418560504913, "step": 27 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 360.98828125, "completions/mean_terminated_length": 427.83795166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.0448, "grad_norm": 0.07104546576738358, "learning_rate": 4.444444444444444e-07, "loss": -0.134, "num_tokens": 14287893.0, "reward": 0.41739368438720703, "reward_std": 0.5269143581390381, "rewards/accuracy_reward_long_step": 0.16796875, "rewards/final_brier_reward_long_step": 0.14333046972751617, "rewards/format_reward_long_step": 0.31640625, "rewards/stepwise_brier_reward_long_step": 0.22155673801898956, "step": 28 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.75, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 966.0, "completions/max_terminated_length": 966.0, "completions/mean_length": 338.6015625, "completions/mean_terminated_length": 394.0090637207031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0464, "grad_norm": 0.1085757464170456, "learning_rate": 4.6031746031746025e-07, "loss": -0.0861, "num_tokens": 14807951.0, "reward": 0.44378989934921265, "reward_std": 0.4950755834579468, "rewards/accuracy_reward_long_step": 0.16796875, "rewards/final_brier_reward_long_step": 0.13291756808757782, "rewards/format_reward_long_step": 0.359375, "rewards/stepwise_brier_reward_long_step": 0.2516169548034668, "step": 29 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 353.80078125, "completions/mean_terminated_length": 402.5466613769531, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.048, "grad_norm": 0.08588235080242157, "learning_rate": 4.761904761904761e-07, "loss": -0.0824, "num_tokens": 15312820.0, "reward": 0.5131794214248657, "reward_std": 0.5309146046638489, "rewards/accuracy_reward_long_step": 0.20703125, "rewards/final_brier_reward_long_step": 0.17949271202087402, "rewards/format_reward_long_step": 0.390625, "rewards/stepwise_brier_reward_long_step": 0.2638500928878784, "step": 30 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.98, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 992.0, "completions/max_terminated_length": 992.0, "completions/mean_length": 355.95703125, "completions/mean_terminated_length": 392.7801818847656, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0496, "grad_norm": 0.0898517370223999, "learning_rate": 4.92063492063492e-07, "loss": -0.0724, "num_tokens": 15823481.0, "reward": 0.5299139618873596, "reward_std": 0.57805997133255, "rewards/accuracy_reward_long_step": 0.2421875, "rewards/final_brier_reward_long_step": 0.17374873161315918, "rewards/format_reward_long_step": 0.35546875, "rewards/stepwise_brier_reward_long_step": 0.26621949672698975, "step": 31 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 358.1640625, "completions/mean_terminated_length": 409.33038330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0512, "grad_norm": 1.044785737991333, "learning_rate": 5.079365079365079e-07, "loss": -0.1237, "num_tokens": 16325931.0, "reward": 0.44143152236938477, "reward_std": 0.4683123230934143, "rewards/accuracy_reward_long_step": 0.16796875, "rewards/final_brier_reward_long_step": 0.14440733194351196, "rewards/format_reward_long_step": 0.35546875, "rewards/stepwise_brier_reward_long_step": 0.2385062575340271, "step": 32 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1016.0, "completions/max_terminated_length": 1016.0, "completions/mean_length": 359.73046875, "completions/mean_terminated_length": 411.12054443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0528, "grad_norm": 0.04461158439517021, "learning_rate": 5.238095238095238e-07, "loss": -0.0966, "num_tokens": 16843398.0, "reward": 0.44179078936576843, "reward_std": 0.507757306098938, "rewards/accuracy_reward_long_step": 0.16796875, "rewards/final_brier_reward_long_step": 0.15976552665233612, "rewards/format_reward_long_step": 0.36328125, "rewards/stepwise_brier_reward_long_step": 0.20896016061306, "step": 33 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.96, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.010000000000000009, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 995.0, "completions/max_terminated_length": 995.0, "completions/mean_length": 345.3828125, "completions/mean_terminated_length": 382.76190185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0544, "grad_norm": 0.14507651329040527, "learning_rate": 5.396825396825396e-07, "loss": -0.1239, "num_tokens": 17357632.0, "reward": 0.4228193163871765, "reward_std": 0.46190011501312256, "rewards/accuracy_reward_long_step": 0.125, "rewards/final_brier_reward_long_step": 0.15529990196228027, "rewards/format_reward_long_step": 0.3828125, "rewards/stepwise_brier_reward_long_step": 0.2703523635864258, "step": 34 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 342.94140625, "completions/mean_terminated_length": 385.0570068359375, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.056, "grad_norm": 0.13200579583644867, "learning_rate": 5.555555555555555e-07, "loss": -0.0371, "num_tokens": 17874505.0, "reward": 0.6404584646224976, "reward_std": 0.5706257820129395, "rewards/accuracy_reward_long_step": 0.29296875, "rewards/final_brier_reward_long_step": 0.22589921951293945, "rewards/format_reward_long_step": 0.42578125, "rewards/stepwise_brier_reward_long_step": 0.31249701976776123, "step": 35 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.97, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 955.0, "completions/max_terminated_length": 955.0, "completions/mean_length": 314.12109375, "completions/mean_terminated_length": 352.6973571777344, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0576, "grad_norm": 0.15383663773536682, "learning_rate": 5.714285714285714e-07, "loss": -0.1074, "num_tokens": 18373048.0, "reward": 0.6416888236999512, "reward_std": 0.582075834274292, "rewards/accuracy_reward_long_step": 0.234375, "rewards/final_brier_reward_long_step": 0.23546718060970306, "rewards/format_reward_long_step": 0.5078125, "rewards/stepwise_brier_reward_long_step": 0.37816306948661804, "step": 36 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.94, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 987.0, "completions/max_terminated_length": 987.0, "completions/mean_length": 334.453125, "completions/mean_terminated_length": 377.18060302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0592, "grad_norm": 0.16420908272266388, "learning_rate": 5.873015873015873e-07, "loss": -0.0809, "num_tokens": 18885148.0, "reward": 0.6179122924804688, "reward_std": 0.5788693428039551, "rewards/accuracy_reward_long_step": 0.22265625, "rewards/final_brier_reward_long_step": 0.20363515615463257, "rewards/format_reward_long_step": 0.49609375, "rewards/stepwise_brier_reward_long_step": 0.385201632976532, "step": 37 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 996.0, "completions/max_terminated_length": 996.0, "completions/mean_length": 345.3828125, "completions/mean_terminated_length": 374.6525573730469, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0608, "grad_norm": 0.20274563133716583, "learning_rate": 6.031746031746031e-07, "loss": -0.0143, "num_tokens": 19403174.0, "reward": 0.5915793180465698, "reward_std": 0.5329806804656982, "rewards/accuracy_reward_long_step": 0.16015625, "rewards/final_brier_reward_long_step": 0.2439812570810318, "rewards/format_reward_long_step": 0.5390625, "rewards/stepwise_brier_reward_long_step": 0.40358591079711914, "step": 38 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 918.0, "completions/max_terminated_length": 918.0, "completions/mean_length": 326.77734375, "completions/mean_terminated_length": 357.5000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.0624, "grad_norm": 0.3366471827030182, "learning_rate": 6.19047619047619e-07, "loss": -0.0919, "num_tokens": 19907301.0, "reward": 0.5409894585609436, "reward_std": 0.4594622850418091, "rewards/accuracy_reward_long_step": 0.13671875, "rewards/final_brier_reward_long_step": 0.20591670274734497, "rewards/format_reward_long_step": 0.546875, "rewards/stepwise_brier_reward_long_step": 0.31741613149642944, "step": 39 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 1015.0, "completions/max_terminated_length": 1015.0, "completions/mean_length": 326.78515625, "completions/mean_terminated_length": 362.1515197753906, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.064, "grad_norm": 0.1065516546368599, "learning_rate": 6.349206349206349e-07, "loss": -0.0945, "num_tokens": 20406822.0, "reward": 0.6141079664230347, "reward_std": 0.5281961560249329, "rewards/accuracy_reward_long_step": 0.1875, "rewards/final_brier_reward_long_step": 0.23533864319324493, "rewards/format_reward_long_step": 0.53125, "rewards/stepwise_brier_reward_long_step": 0.4085933566093445, "step": 40 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1020.0, "completions/max_terminated_length": 1020.0, "completions/mean_length": 306.8515625, "completions/mean_terminated_length": 350.6875305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.0656, "grad_norm": 0.21869583427906036, "learning_rate": 6.507936507936507e-07, "loss": -0.1106, "num_tokens": 20919024.0, "reward": 0.5630888342857361, "reward_std": 0.48765885829925537, "rewards/accuracy_reward_long_step": 0.15234375, "rewards/final_brier_reward_long_step": 0.21809795498847961, "rewards/format_reward_long_step": 0.5234375, "rewards/stepwise_brier_reward_long_step": 0.37800735235214233, "step": 41 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 956.0, "completions/max_terminated_length": 956.0, "completions/mean_length": 311.23828125, "completions/mean_terminated_length": 344.9220886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0672, "grad_norm": 0.12498702108860016, "learning_rate": 6.666666666666666e-07, "loss": -0.0945, "num_tokens": 21415717.0, "reward": 0.7428398132324219, "reward_std": 0.5535950660705566, "rewards/accuracy_reward_long_step": 0.30078125, "rewards/final_brier_reward_long_step": 0.27300766110420227, "rewards/format_reward_long_step": 0.5625, "rewards/stepwise_brier_reward_long_step": 0.37022653222084045, "step": 42 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.8, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 978.0, "completions/max_terminated_length": 978.0, "completions/mean_length": 316.40234375, "completions/mean_terminated_length": 336.095458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.0688, "grad_norm": 0.10794218629598618, "learning_rate": 6.825396825396826e-07, "loss": -0.0312, "num_tokens": 21930028.0, "reward": 0.6837334632873535, "reward_std": 0.5118536949157715, "rewards/accuracy_reward_long_step": 0.2265625, "rewards/final_brier_reward_long_step": 0.2446330040693283, "rewards/format_reward_long_step": 0.57421875, "rewards/stepwise_brier_reward_long_step": 0.43561333417892456, "step": 43 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 312.5703125, "completions/mean_terminated_length": 339.059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.0704, "grad_norm": 0.08316317200660706, "learning_rate": 6.984126984126983e-07, "loss": -0.0437, "num_tokens": 22445902.0, "reward": 0.758315920829773, "reward_std": 0.5356731414794922, "rewards/accuracy_reward_long_step": 0.28515625, "rewards/final_brier_reward_long_step": 0.2901049256324768, "rewards/format_reward_long_step": 0.60546875, "rewards/stepwise_brier_reward_long_step": 0.39159637689590454, "step": 44 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.85, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 1011.0, "completions/max_terminated_length": 1011.0, "completions/mean_length": 316.65625, "completions/mean_terminated_length": 336.36517333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.072, "grad_norm": 0.14549246430397034, "learning_rate": 7.142857142857143e-07, "loss": -0.0613, "num_tokens": 22949038.0, "reward": 0.7944426536560059, "reward_std": 0.5610437393188477, "rewards/accuracy_reward_long_step": 0.26953125, "rewards/final_brier_reward_long_step": 0.320908784866333, "rewards/format_reward_long_step": 0.6484375, "rewards/stepwise_brier_reward_long_step": 0.48186174035072327, "step": 45 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 960.0, "completions/max_terminated_length": 960.0, "completions/mean_length": 295.67578125, "completions/mean_terminated_length": 314.078857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0736, "grad_norm": 0.07323121279478073, "learning_rate": 7.301587301587301e-07, "loss": -0.0735, "num_tokens": 23427059.0, "reward": 0.6723222732543945, "reward_std": 0.4763370752334595, "rewards/accuracy_reward_long_step": 0.15234375, "rewards/final_brier_reward_long_step": 0.26658162474632263, "rewards/format_reward_long_step": 0.6875, "rewards/stepwise_brier_reward_long_step": 0.43833261728286743, "step": 46 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 841.0, "completions/max_terminated_length": 841.0, "completions/mean_length": 306.3046875, "completions/mean_terminated_length": 321.36883544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.0752, "grad_norm": 0.2430606335401535, "learning_rate": 7.46031746031746e-07, "loss": -0.1118, "num_tokens": 23915857.0, "reward": 0.9021989703178406, "reward_std": 0.5503741502761841, "rewards/accuracy_reward_long_step": 0.31640625, "rewards/final_brier_reward_long_step": 0.36550503969192505, "rewards/format_reward_long_step": 0.7265625, "rewards/stepwise_brier_reward_long_step": 0.5245407223701477, "step": 47 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 898.0, "completions/max_terminated_length": 898.0, "completions/mean_length": 315.328125, "completions/mean_terminated_length": 328.1463317871094, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.0768, "grad_norm": 0.23907212913036346, "learning_rate": 7.619047619047618e-07, "loss": -0.0704, "num_tokens": 24407109.0, "reward": 0.8392512798309326, "reward_std": 0.5024853944778442, "rewards/accuracy_reward_long_step": 0.25390625, "rewards/final_brier_reward_long_step": 0.34439170360565186, "rewards/format_reward_long_step": 0.71875, "rewards/stepwise_brier_reward_long_step": 0.5594882965087891, "step": 48 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 821.0, "completions/max_terminated_length": 821.0, "completions/mean_length": 306.22265625, "completions/mean_terminated_length": 322.60491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.0784, "grad_norm": 0.13986116647720337, "learning_rate": 7.777777777777778e-07, "loss": -0.0654, "num_tokens": 24920814.0, "reward": 0.9134366512298584, "reward_std": 0.5091613531112671, "rewards/accuracy_reward_long_step": 0.2734375, "rewards/final_brier_reward_long_step": 0.3957996368408203, "rewards/format_reward_long_step": 0.79296875, "rewards/stepwise_brier_reward_long_step": 0.5782594680786133, "step": 49 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 819.0, "completions/max_terminated_length": 819.0, "completions/mean_length": 281.90625, "completions/mean_terminated_length": 295.7704772949219, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.08, "grad_norm": 0.22414922714233398, "learning_rate": 7.936507936507936e-07, "loss": -0.0835, "num_tokens": 25417334.0, "reward": 0.87409508228302, "reward_std": 0.49227654933929443, "rewards/accuracy_reward_long_step": 0.23046875, "rewards/final_brier_reward_long_step": 0.35886436700820923, "rewards/format_reward_long_step": 0.80859375, "rewards/stepwise_brier_reward_long_step": 0.59845370054245, "step": 50 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.88, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 998.0, "completions/max_terminated_length": 998.0, "completions/mean_length": 297.73046875, "completions/mean_terminated_length": 304.8760070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.0816, "grad_norm": 0.15504558384418488, "learning_rate": 8.095238095238095e-07, "loss": -0.0377, "num_tokens": 25917625.0, "reward": 0.8817519545555115, "reward_std": 0.5145280361175537, "rewards/accuracy_reward_long_step": 0.2265625, "rewards/final_brier_reward_long_step": 0.36166319251060486, "rewards/format_reward_long_step": 0.828125, "rewards/stepwise_brier_reward_long_step": 0.6028447151184082, "step": 51 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 777.0, "completions/max_terminated_length": 777.0, "completions/mean_length": 279.953125, "completions/mean_terminated_length": 285.5298767089844, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.0832, "grad_norm": 0.06322144716978073, "learning_rate": 8.253968253968253e-07, "loss": -0.0324, "num_tokens": 26420901.0, "reward": 0.9491708278656006, "reward_std": 0.4398835599422455, "rewards/accuracy_reward_long_step": 0.25390625, "rewards/final_brier_reward_long_step": 0.42088940739631653, "rewards/format_reward_long_step": 0.8828125, "rewards/stepwise_brier_reward_long_step": 0.594543993473053, "step": 52 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 888.0, "completions/max_terminated_length": 888.0, "completions/mean_length": 299.453125, "completions/mean_terminated_length": 307.8714599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0848, "grad_norm": 0.045589037239551544, "learning_rate": 8.412698412698413e-07, "loss": -0.0128, "num_tokens": 26920409.0, "reward": 0.8190128803253174, "reward_std": 0.3742133677005768, "rewards/accuracy_reward_long_step": 0.16015625, "rewards/final_brier_reward_long_step": 0.3180277347564697, "rewards/format_reward_long_step": 0.84375, "rewards/stepwise_brier_reward_long_step": 0.6298986673355103, "step": 53 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 840.0, "completions/max_terminated_length": 840.0, "completions/mean_length": 290.125, "completions/mean_terminated_length": 293.5652160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.0864, "grad_norm": 0.2063865065574646, "learning_rate": 8.57142857142857e-07, "loss": -0.0173, "num_tokens": 27414017.0, "reward": 0.9507964253425598, "reward_std": 0.426396906375885, "rewards/accuracy_reward_long_step": 0.25, "rewards/final_brier_reward_long_step": 0.38583073019981384, "rewards/format_reward_long_step": 0.890625, "rewards/stepwise_brier_reward_long_step": 0.636104941368103, "step": 54 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 757.0, "completions/max_terminated_length": 757.0, "completions/mean_length": 291.60546875, "completions/mean_terminated_length": 299.8031921386719, "completions/min_length": 0.0, "completions/min_terminated_length": 28.0, "epoch": 0.088, "grad_norm": 0.12576599419116974, "learning_rate": 8.73015873015873e-07, "loss": -0.0195, "num_tokens": 27925500.0, "reward": 1.0254812240600586, "reward_std": 0.5260324478149414, "rewards/accuracy_reward_long_step": 0.31640625, "rewards/final_brier_reward_long_step": 0.45056432485580444, "rewards/format_reward_long_step": 0.85546875, "rewards/stepwise_brier_reward_long_step": 0.674798309803009, "step": 55 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 969.0, "completions/max_terminated_length": 969.0, "completions/mean_length": 289.62890625, "completions/mean_terminated_length": 293.0632629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.0896, "grad_norm": 0.13417616486549377, "learning_rate": 8.888888888888888e-07, "loss": -0.0375, "num_tokens": 28431053.0, "reward": 1.1285545825958252, "reward_std": 0.4153571128845215, "rewards/accuracy_reward_long_step": 0.38671875, "rewards/final_brier_reward_long_step": 0.4956166744232178, "rewards/format_reward_long_step": 0.92578125, "rewards/stepwise_brier_reward_long_step": 0.6201643943786621, "step": 56 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 763.0, "completions/max_terminated_length": 763.0, "completions/mean_length": 269.65625, "completions/mean_terminated_length": 276.1280212402344, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.0912, "grad_norm": 0.06763328611850739, "learning_rate": 9.047619047619047e-07, "loss": -0.0317, "num_tokens": 28913637.0, "reward": 0.9394167065620422, "reward_std": 0.3821975290775299, "rewards/accuracy_reward_long_step": 0.2421875, "rewards/final_brier_reward_long_step": 0.423524409532547, "rewards/format_reward_long_step": 0.91796875, "rewards/stepwise_brier_reward_long_step": 0.5294549465179443, "step": 57 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 827.0, "completions/max_terminated_length": 827.0, "completions/mean_length": 271.98828125, "completions/mean_terminated_length": 277.4063720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.0928, "grad_norm": 0.25471770763397217, "learning_rate": 9.206349206349205e-07, "loss": -0.0193, "num_tokens": 29408858.0, "reward": 1.0430493354797363, "reward_std": 0.45021143555641174, "rewards/accuracy_reward_long_step": 0.30078125, "rewards/final_brier_reward_long_step": 0.45864561200141907, "rewards/format_reward_long_step": 0.90625, "rewards/stepwise_brier_reward_long_step": 0.6979269981384277, "step": 58 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 268.2109375, "completions/mean_terminated_length": 272.46826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.0944, "grad_norm": 0.08365736901760101, "learning_rate": 9.365079365079365e-07, "loss": -0.0398, "num_tokens": 29906672.0, "reward": 0.9959282875061035, "reward_std": 0.3980240225791931, "rewards/accuracy_reward_long_step": 0.28515625, "rewards/final_brier_reward_long_step": 0.4108448326587677, "rewards/format_reward_long_step": 0.8984375, "rewards/stepwise_brier_reward_long_step": 0.6353680491447449, "step": 59 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 870.0, "completions/max_terminated_length": 870.0, "completions/mean_length": 270.6953125, "completions/mean_terminated_length": 274.9920654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.096, "grad_norm": 0.037650614976882935, "learning_rate": 9.523809523809522e-07, "loss": -0.0363, "num_tokens": 30394242.0, "reward": 0.9999738931655884, "reward_std": 0.4165264964103699, "rewards/accuracy_reward_long_step": 0.2734375, "rewards/final_brier_reward_long_step": 0.44108301401138306, "rewards/format_reward_long_step": 0.90234375, "rewards/stepwise_brier_reward_long_step": 0.66037517786026, "step": 60 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 865.0, "completions/max_terminated_length": 865.0, "completions/mean_length": 265.21875, "completions/mean_terminated_length": 267.3070983886719, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.0976, "grad_norm": 0.106496162712574, "learning_rate": 9.682539682539682e-07, "loss": -0.0001, "num_tokens": 30893266.0, "reward": 1.1621458530426025, "reward_std": 0.4202546775341034, "rewards/accuracy_reward_long_step": 0.390625, "rewards/final_brier_reward_long_step": 0.533796489238739, "rewards/format_reward_long_step": 0.94140625, "rewards/stepwise_brier_reward_long_step": 0.6694742441177368, "step": 61 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 643.0, "completions/max_terminated_length": 643.0, "completions/mean_length": 263.109375, "completions/mean_terminated_length": 266.229248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.0992, "grad_norm": 0.10262063890695572, "learning_rate": 9.84126984126984e-07, "loss": 0.0064, "num_tokens": 31379758.0, "reward": 1.0871508121490479, "reward_std": 0.3243914842605591, "rewards/accuracy_reward_long_step": 0.3203125, "rewards/final_brier_reward_long_step": 0.49898362159729004, "rewards/format_reward_long_step": 0.94140625, "rewards/stepwise_brier_reward_long_step": 0.6855573058128357, "step": 62 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 797.0, "completions/max_terminated_length": 797.0, "completions/mean_length": 269.95703125, "completions/mean_terminated_length": 271.0157165527344, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.1008, "grad_norm": 0.045267656445503235, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 31875611.0, "reward": 1.0248790979385376, "reward_std": 0.3271501064300537, "rewards/accuracy_reward_long_step": 0.26171875, "rewards/final_brier_reward_long_step": 0.5174949169158936, "rewards/format_reward_long_step": 0.9609375, "rewards/stepwise_brier_reward_long_step": 0.6132714152336121, "step": 63 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 709.0, "completions/max_terminated_length": 709.0, "completions/mean_length": 243.5, "completions/mean_terminated_length": 245.41732788085938, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.1024, "grad_norm": 0.09396693110466003, "learning_rate": 9.98220640569395e-07, "loss": -0.0249, "num_tokens": 32375531.0, "reward": 1.0874074697494507, "reward_std": 0.405730664730072, "rewards/accuracy_reward_long_step": 0.3046875, "rewards/final_brier_reward_long_step": 0.4932839870452881, "rewards/format_reward_long_step": 0.94140625, "rewards/stepwise_brier_reward_long_step": 0.7547836899757385, "step": 64 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 603.0, "completions/max_terminated_length": 603.0, "completions/mean_length": 238.4765625, "completions/mean_terminated_length": 242.2619171142578, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.104, "grad_norm": 0.10586308687925339, "learning_rate": 9.9644128113879e-07, "loss": -0.0241, "num_tokens": 32865229.0, "reward": 1.0684640407562256, "reward_std": 0.37198448181152344, "rewards/accuracy_reward_long_step": 0.30078125, "rewards/final_brier_reward_long_step": 0.5142987966537476, "rewards/format_reward_long_step": 0.9375, "rewards/stepwise_brier_reward_long_step": 0.68143230676651, "step": 65 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 795.0, "completions/max_terminated_length": 795.0, "completions/mean_length": 235.64453125, "completions/mean_terminated_length": 238.43875122070312, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.1056, "grad_norm": 0.06174841523170471, "learning_rate": 9.94661921708185e-07, "loss": -0.0329, "num_tokens": 33359506.0, "reward": 1.0320240259170532, "reward_std": 0.3543888330459595, "rewards/accuracy_reward_long_step": 0.28125, "rewards/final_brier_reward_long_step": 0.523512601852417, "rewards/format_reward_long_step": 0.94140625, "rewards/stepwise_brier_reward_long_step": 0.5967710614204407, "step": 66 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 627.0, "completions/max_terminated_length": 627.0, "completions/mean_length": 236.34375, "completions/mean_terminated_length": 237.27059936523438, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.1072, "grad_norm": 0.07356097549200058, "learning_rate": 9.9288256227758e-07, "loss": -0.0126, "num_tokens": 33855346.0, "reward": 1.2128088474273682, "reward_std": 0.32791173458099365, "rewards/accuracy_reward_long_step": 0.3828125, "rewards/final_brier_reward_long_step": 0.6027936935424805, "rewards/format_reward_long_step": 0.96484375, "rewards/stepwise_brier_reward_long_step": 0.7875038385391235, "step": 67 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1007.0, "completions/max_terminated_length": 1007.0, "completions/mean_length": 241.8046875, "completions/mean_terminated_length": 242.75296020507812, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.1088, "grad_norm": 0.09495385736227036, "learning_rate": 9.91103202846975e-07, "loss": 0.0023, "num_tokens": 34342288.0, "reward": 1.1574406623840332, "reward_std": 0.36004209518432617, "rewards/accuracy_reward_long_step": 0.328125, "rewards/final_brier_reward_long_step": 0.6040390729904175, "rewards/format_reward_long_step": 0.96875, "rewards/stepwise_brier_reward_long_step": 0.7757238149642944, "step": 68 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 808.0, "completions/max_terminated_length": 808.0, "completions/mean_length": 220.72265625, "completions/mean_terminated_length": 222.46063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.1104, "grad_norm": 0.4110874533653259, "learning_rate": 9.8932384341637e-07, "loss": 0.009, "num_tokens": 34802673.0, "reward": 1.3306258916854858, "reward_std": 0.33039307594299316, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.6564062833786011, "rewards/format_reward_long_step": 0.98046875, "rewards/stepwise_brier_reward_long_step": 0.7676596641540527, "step": 69 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 224.27734375, "completions/mean_terminated_length": 224.27734375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.112, "grad_norm": 0.05017966777086258, "learning_rate": 9.87544483985765e-07, "loss": -0.0149, "num_tokens": 35287528.0, "reward": 1.2564759254455566, "reward_std": 0.27206528186798096, "rewards/accuracy_reward_long_step": 0.421875, "rewards/final_brier_reward_long_step": 0.6693449020385742, "rewards/format_reward_long_step": 0.97265625, "rewards/stepwise_brier_reward_long_step": 0.7237462401390076, "step": 70 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/max_terminated_length": 550.0, "completions/mean_length": 218.94140625, "completions/mean_terminated_length": 218.94140625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.1136, "grad_norm": 0.060872942209243774, "learning_rate": 9.8576512455516e-07, "loss": -0.0093, "num_tokens": 35762137.0, "reward": 1.3191676139831543, "reward_std": 0.35025539994239807, "rewards/accuracy_reward_long_step": 0.4609375, "rewards/final_brier_reward_long_step": 0.6817148327827454, "rewards/format_reward_long_step": 0.984375, "rewards/stepwise_brier_reward_long_step": 0.7824558615684509, "step": 71 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 705.0, "completions/max_terminated_length": 705.0, "completions/mean_length": 223.96875, "completions/mean_terminated_length": 223.96875, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.1152, "grad_norm": 0.11113214492797852, "learning_rate": 9.83985765124555e-07, "loss": 0.0053, "num_tokens": 36233089.0, "reward": 1.1259610652923584, "reward_std": 0.26902925968170166, "rewards/accuracy_reward_long_step": 0.29296875, "rewards/final_brier_reward_long_step": 0.6245523691177368, "rewards/format_reward_long_step": 0.97265625, "rewards/stepwise_brier_reward_long_step": 0.7621045112609863, "step": 72 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 589.0, "completions/max_terminated_length": 589.0, "completions/mean_length": 214.41015625, "completions/mean_terminated_length": 216.09841918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.1168, "grad_norm": 0.15858663618564606, "learning_rate": 9.8220640569395e-07, "loss": -0.0158, "num_tokens": 36715946.0, "reward": 1.0615253448486328, "reward_std": 0.2766742706298828, "rewards/accuracy_reward_long_step": 0.234375, "rewards/final_brier_reward_long_step": 0.6290937662124634, "rewards/format_reward_long_step": 0.96484375, "rewards/stepwise_brier_reward_long_step": 0.7498202919960022, "step": 73 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 214.25390625, "completions/mean_terminated_length": 214.25390625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.1184, "grad_norm": 0.5460712909698486, "learning_rate": 9.804270462633451e-07, "loss": -0.011, "num_tokens": 37180563.0, "reward": 1.268796443939209, "reward_std": 0.3486781716346741, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.6875852346420288, "rewards/format_reward_long_step": 0.98046875, "rewards/stepwise_brier_reward_long_step": 0.8016629219055176, "step": 74 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 712.0, "completions/max_terminated_length": 712.0, "completions/mean_length": 208.28515625, "completions/mean_terminated_length": 209.92520141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.12, "grad_norm": 0.06162435933947563, "learning_rate": 9.786476868327401e-07, "loss": -0.0327, "num_tokens": 37667916.0, "reward": 1.14532470703125, "reward_std": 0.30212295055389404, "rewards/accuracy_reward_long_step": 0.30859375, "rewards/final_brier_reward_long_step": 0.6789199113845825, "rewards/format_reward_long_step": 0.96484375, "rewards/stepwise_brier_reward_long_step": 0.7383161187171936, "step": 75 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 216.5234375, "completions/mean_terminated_length": 216.5234375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.1216, "grad_norm": 0.07474726438522339, "learning_rate": 9.768683274021351e-07, "loss": 0.0095, "num_tokens": 38143594.0, "reward": 1.2285196781158447, "reward_std": 0.2515240013599396, "rewards/accuracy_reward_long_step": 0.3671875, "rewards/final_brier_reward_long_step": 0.715578556060791, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7375626564025879, "step": 76 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 573.0, "completions/max_terminated_length": 573.0, "completions/mean_length": 209.671875, "completions/mean_terminated_length": 209.671875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.1232, "grad_norm": 0.04926946386694908, "learning_rate": 9.750889679715302e-07, "loss": 0.0161, "num_tokens": 38622302.0, "reward": 1.2208093404769897, "reward_std": 0.23148852586746216, "rewards/accuracy_reward_long_step": 0.34375, "rewards/final_brier_reward_long_step": 0.7191964387893677, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.8046656250953674, "step": 77 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/max_terminated_length": 500.0, "completions/mean_length": 205.0, "completions/mean_terminated_length": 205.0, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.1248, "grad_norm": 0.05082042142748833, "learning_rate": 9.733096085409252e-07, "loss": -0.0045, "num_tokens": 39095686.0, "reward": 1.1963913440704346, "reward_std": 0.2708578109741211, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7243698239326477, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7565078735351562, "step": 78 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 692.0, "completions/max_terminated_length": 692.0, "completions/mean_length": 209.58203125, "completions/mean_terminated_length": 209.58203125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.1264, "grad_norm": 0.05406387895345688, "learning_rate": 9.715302491103202e-07, "loss": -0.0165, "num_tokens": 39573555.0, "reward": 1.2039846181869507, "reward_std": 0.21231237053871155, "rewards/accuracy_reward_long_step": 0.328125, "rewards/final_brier_reward_long_step": 0.7417787313461304, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7850972414016724, "step": 79 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/max_terminated_length": 389.0, "completions/mean_length": 200.73046875, "completions/mean_terminated_length": 200.73046875, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.128, "grad_norm": 0.03713912516832352, "learning_rate": 9.697508896797152e-07, "loss": -0.0034, "num_tokens": 40051286.0, "reward": 1.1338202953338623, "reward_std": 0.22740787267684937, "rewards/accuracy_reward_long_step": 0.25390625, "rewards/final_brier_reward_long_step": 0.7587928771972656, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7843012809753418, "step": 80 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 581.0, "completions/max_terminated_length": 581.0, "completions/mean_length": 197.05859375, "completions/mean_terminated_length": 197.05859375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.1296, "grad_norm": 0.13255728781223297, "learning_rate": 9.679715302491102e-07, "loss": -0.0323, "num_tokens": 40525805.0, "reward": 1.2546930313110352, "reward_std": 0.29959502816200256, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7437311410903931, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7750412225723267, "step": 81 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 397.0, "completions/max_terminated_length": 397.0, "completions/mean_length": 185.1015625, "completions/mean_terminated_length": 185.1015625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.1312, "grad_norm": 0.03457874432206154, "learning_rate": 9.661921708185054e-07, "loss": 0.0033, "num_tokens": 41000735.0, "reward": 1.2457207441329956, "reward_std": 0.1934887319803238, "rewards/accuracy_reward_long_step": 0.359375, "rewards/final_brier_reward_long_step": 0.7806586027145386, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7647244334220886, "step": 82 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/max_terminated_length": 452.0, "completions/mean_length": 193.9140625, "completions/mean_terminated_length": 193.9140625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.1328, "grad_norm": 0.04606304317712784, "learning_rate": 9.644128113879002e-07, "loss": -0.0082, "num_tokens": 41472457.0, "reward": 1.2768468856811523, "reward_std": 0.25950515270233154, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.7273233532905579, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7785016894340515, "step": 83 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 564.0, "completions/max_terminated_length": 564.0, "completions/mean_length": 187.13671875, "completions/mean_terminated_length": 187.13671875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.1344, "grad_norm": 0.04770605266094208, "learning_rate": 9.626334519572953e-07, "loss": 0.003, "num_tokens": 41954644.0, "reward": 1.3016421794891357, "reward_std": 0.2636979818344116, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7189725637435913, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7610331773757935, "step": 84 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 380.0, "completions/max_terminated_length": 380.0, "completions/mean_length": 185.37109375, "completions/mean_terminated_length": 186.09805297851562, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.136, "grad_norm": 0.038233935832977295, "learning_rate": 9.608540925266903e-07, "loss": -0.0246, "num_tokens": 42421019.0, "reward": 1.3664637804031372, "reward_std": 0.2260427474975586, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.6927652359008789, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.765277624130249, "step": 85 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 552.0, "completions/max_terminated_length": 552.0, "completions/mean_length": 192.4375, "completions/mean_terminated_length": 192.4375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.1376, "grad_norm": 0.031944431364536285, "learning_rate": 9.590747330960853e-07, "loss": -0.0075, "num_tokens": 42885091.0, "reward": 1.2885265350341797, "reward_std": 0.21891814470291138, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7211390733718872, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7689046859741211, "step": 86 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 192.12890625, "completions/mean_terminated_length": 192.12890625, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.1392, "grad_norm": 0.053836286067962646, "learning_rate": 9.572953736654805e-07, "loss": 0.0032, "num_tokens": 43357156.0, "reward": 1.3813579082489014, "reward_std": 0.25391727685928345, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.6488757729530334, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7124930620193481, "step": 87 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 579.0, "completions/max_terminated_length": 579.0, "completions/mean_length": 188.6640625, "completions/mean_terminated_length": 188.6640625, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.1408, "grad_norm": 0.04048394784331322, "learning_rate": 9.555160142348753e-07, "loss": -0.0004, "num_tokens": 43821726.0, "reward": 1.2759735584259033, "reward_std": 0.23172587156295776, "rewards/accuracy_reward_long_step": 0.40234375, "rewards/final_brier_reward_long_step": 0.7274124622344971, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7905445694923401, "step": 88 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 534.0, "completions/max_terminated_length": 534.0, "completions/mean_length": 190.31640625, "completions/mean_terminated_length": 190.31640625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.1424, "grad_norm": 0.035542842000722885, "learning_rate": 9.537366548042705e-07, "loss": 0.0089, "num_tokens": 44299575.0, "reward": 1.2254152297973633, "reward_std": 0.2625795304775238, "rewards/accuracy_reward_long_step": 0.359375, "rewards/final_brier_reward_long_step": 0.7171218395233154, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7626639604568481, "step": 89 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 186.6171875, "completions/mean_terminated_length": 186.6171875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.144, "grad_norm": 0.04348074272274971, "learning_rate": 9.519572953736655e-07, "loss": -0.0097, "num_tokens": 44766493.0, "reward": 1.2421379089355469, "reward_std": 0.2381449192762375, "rewards/accuracy_reward_long_step": 0.37109375, "rewards/final_brier_reward_long_step": 0.7263898849487305, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7577868700027466, "step": 90 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 193.671875, "completions/mean_terminated_length": 194.43138122558594, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.1456, "grad_norm": 0.10439097136259079, "learning_rate": 9.501779359430605e-07, "loss": 0.014, "num_tokens": 45226529.0, "reward": 1.2801823616027832, "reward_std": 0.22653043270111084, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.6875629425048828, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7534794807434082, "step": 91 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/max_terminated_length": 472.0, "completions/mean_length": 192.19921875, "completions/mean_terminated_length": 192.19921875, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.1472, "grad_norm": 0.038028016686439514, "learning_rate": 9.483985765124555e-07, "loss": 0.0038, "num_tokens": 45698404.0, "reward": 1.2324891090393066, "reward_std": 0.2521362900733948, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7127734422683716, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7171825766563416, "step": 92 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/max_terminated_length": 394.0, "completions/mean_length": 186.05859375, "completions/mean_terminated_length": 186.05859375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.1488, "grad_norm": 0.0344776026904583, "learning_rate": 9.466192170818504e-07, "loss": 0.0051, "num_tokens": 46169131.0, "reward": 1.2282606363296509, "reward_std": 0.16655448079109192, "rewards/accuracy_reward_long_step": 0.359375, "rewards/final_brier_reward_long_step": 0.7309889793395996, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7523662447929382, "step": 93 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/max_terminated_length": 434.0, "completions/mean_length": 190.9765625, "completions/mean_terminated_length": 190.9765625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.1504, "grad_norm": 0.03720776364207268, "learning_rate": 9.448398576512455e-07, "loss": 0.0102, "num_tokens": 46636349.0, "reward": 1.2539737224578857, "reward_std": 0.2229781448841095, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7260522246360779, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7742174863815308, "step": 94 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 563.0, "completions/max_terminated_length": 563.0, "completions/mean_length": 191.19921875, "completions/mean_terminated_length": 191.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.152, "grad_norm": 0.03717532381415367, "learning_rate": 9.430604982206405e-07, "loss": -0.0118, "num_tokens": 47117128.0, "reward": 1.2085305452346802, "reward_std": 0.22574907541275024, "rewards/accuracy_reward_long_step": 0.3359375, "rewards/final_brier_reward_long_step": 0.7332504391670227, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7805593013763428, "step": 95 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 707.0, "completions/max_terminated_length": 707.0, "completions/mean_length": 193.6953125, "completions/mean_terminated_length": 193.6953125, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.1536, "grad_norm": 0.03478072211146355, "learning_rate": 9.412811387900355e-07, "loss": -0.0084, "num_tokens": 47604330.0, "reward": 1.3053498268127441, "reward_std": 0.20083755254745483, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.6733219027519226, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7355772256851196, "step": 96 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 397.0, "completions/max_terminated_length": 397.0, "completions/mean_length": 196.6015625, "completions/mean_terminated_length": 196.6015625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.1552, "grad_norm": 0.040781840682029724, "learning_rate": 9.395017793594306e-07, "loss": -0.0066, "num_tokens": 48071700.0, "reward": 1.2769510746002197, "reward_std": 0.18185681104660034, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7033559679985046, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7560105919837952, "step": 97 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.0, "completions/max_terminated_length": 510.0, "completions/mean_length": 184.20703125, "completions/mean_terminated_length": 184.20703125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.1568, "grad_norm": 0.04920961335301399, "learning_rate": 9.377224199288256e-07, "loss": -0.0063, "num_tokens": 48530185.0, "reward": 1.2058653831481934, "reward_std": 0.1759049892425537, "rewards/accuracy_reward_long_step": 0.32421875, "rewards/final_brier_reward_long_step": 0.7487024068832397, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7856966853141785, "step": 98 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 381.0, "completions/max_terminated_length": 381.0, "completions/mean_length": 186.4609375, "completions/mean_terminated_length": 186.4609375, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.1584, "grad_norm": 0.03607820346951485, "learning_rate": 9.359430604982206e-07, "loss": 0.002, "num_tokens": 49009167.0, "reward": 1.2021329402923584, "reward_std": 0.17198419570922852, "rewards/accuracy_reward_long_step": 0.30859375, "rewards/final_brier_reward_long_step": 0.7756035327911377, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7985529899597168, "step": 99 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/max_terminated_length": 404.0, "completions/mean_length": 193.265625, "completions/mean_terminated_length": 193.265625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.16, "grad_norm": 0.03377021104097366, "learning_rate": 9.341637010676157e-07, "loss": 0.0073, "num_tokens": 49501779.0, "reward": 1.2661197185516357, "reward_std": 0.1904180347919464, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.6986390352249146, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7408397197723389, "step": 100 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 703.0, "completions/max_terminated_length": 703.0, "completions/mean_length": 191.34765625, "completions/mean_terminated_length": 192.09805297851562, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.1616, "grad_norm": 0.04310128837823868, "learning_rate": 9.323843416370106e-07, "loss": -0.0187, "num_tokens": 49985236.0, "reward": 1.2589505910873413, "reward_std": 0.15486913919448853, "rewards/accuracy_reward_long_step": 0.38671875, "rewards/final_brier_reward_long_step": 0.7101609110832214, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7943914532661438, "step": 101 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 716.0, "completions/max_terminated_length": 716.0, "completions/mean_length": 197.26953125, "completions/mean_terminated_length": 197.26953125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.1632, "grad_norm": 0.06449352204799652, "learning_rate": 9.306049822064056e-07, "loss": -0.0023, "num_tokens": 50458833.0, "reward": 1.2103374004364014, "reward_std": 0.17173272371292114, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7552437782287598, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7736056447029114, "step": 102 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/max_terminated_length": 449.0, "completions/mean_length": 196.171875, "completions/mean_terminated_length": 196.171875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.1648, "grad_norm": 0.03428987041115761, "learning_rate": 9.288256227758006e-07, "loss": -0.0075, "num_tokens": 50948197.0, "reward": 1.2849457263946533, "reward_std": 0.22478044033050537, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.6716355085372925, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7415850758552551, "step": 103 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 388.0, "completions/max_terminated_length": 388.0, "completions/mean_length": 189.1171875, "completions/mean_terminated_length": 189.1171875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.1664, "grad_norm": 0.03708449751138687, "learning_rate": 9.270462633451957e-07, "loss": 0.0166, "num_tokens": 51417579.0, "reward": 1.3035290241241455, "reward_std": 0.2233991026878357, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.6784660220146179, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7544001936912537, "step": 104 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/max_terminated_length": 467.0, "completions/mean_length": 199.61328125, "completions/mean_terminated_length": 199.61328125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.168, "grad_norm": 0.04729332774877548, "learning_rate": 9.252669039145908e-07, "loss": -0.0052, "num_tokens": 51903344.0, "reward": 1.32490873336792, "reward_std": 0.23365336656570435, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.6609004139900208, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7559216618537903, "step": 105 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/max_terminated_length": 434.0, "completions/mean_length": 199.90625, "completions/mean_terminated_length": 199.90625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.1696, "grad_norm": 0.03533056378364563, "learning_rate": 9.234875444839857e-07, "loss": 0.0067, "num_tokens": 52395024.0, "reward": 1.1930384635925293, "reward_std": 0.1815432459115982, "rewards/accuracy_reward_long_step": 0.3203125, "rewards/final_brier_reward_long_step": 0.7636566162109375, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7272477149963379, "step": 106 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 354.0, "completions/max_terminated_length": 354.0, "completions/mean_length": 190.7265625, "completions/mean_terminated_length": 190.7265625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.1712, "grad_norm": 0.03335587680339813, "learning_rate": 9.217081850533808e-07, "loss": 0.0079, "num_tokens": 52879418.0, "reward": 1.2246638536453247, "reward_std": 0.22744205594062805, "rewards/accuracy_reward_long_step": 0.359375, "rewards/final_brier_reward_long_step": 0.7271843552589417, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7495959997177124, "step": 107 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 590.0, "completions/max_terminated_length": 590.0, "completions/mean_length": 195.2109375, "completions/mean_terminated_length": 195.2109375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.1728, "grad_norm": 0.03303585201501846, "learning_rate": 9.199288256227757e-07, "loss": 0.0029, "num_tokens": 53356184.0, "reward": 1.2511444091796875, "reward_std": 0.16916480660438538, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7238613367080688, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7650911808013916, "step": 108 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 534.0, "completions/max_terminated_length": 534.0, "completions/mean_length": 194.62890625, "completions/mean_terminated_length": 194.62890625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.1744, "grad_norm": 0.03541121259331703, "learning_rate": 9.181494661921708e-07, "loss": -0.0096, "num_tokens": 53823321.0, "reward": 1.2710667848587036, "reward_std": 0.17301318049430847, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.6999242305755615, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7437179684638977, "step": 109 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/max_terminated_length": 431.0, "completions/mean_length": 197.36328125, "completions/mean_terminated_length": 197.36328125, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.176, "grad_norm": 0.032131701707839966, "learning_rate": 9.163701067615657e-07, "loss": 0.0068, "num_tokens": 54302126.0, "reward": 1.3117148876190186, "reward_std": 0.232276052236557, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.6579011678695679, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7686457633972168, "step": 110 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 199.5078125, "completions/mean_terminated_length": 199.5078125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.1776, "grad_norm": 0.03558618575334549, "learning_rate": 9.145907473309609e-07, "loss": -0.0095, "num_tokens": 54778832.0, "reward": 1.2715282440185547, "reward_std": 0.17849522829055786, "rewards/accuracy_reward_long_step": 0.40234375, "rewards/final_brier_reward_long_step": 0.7140308618545532, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7627072930335999, "step": 111 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 399.0, "completions/max_terminated_length": 399.0, "completions/mean_length": 193.99609375, "completions/mean_terminated_length": 193.99609375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.1792, "grad_norm": 0.048463039100170135, "learning_rate": 9.128113879003559e-07, "loss": -0.0061, "num_tokens": 55260847.0, "reward": 1.2326127290725708, "reward_std": 0.10357346385717392, "rewards/accuracy_reward_long_step": 0.35546875, "rewards/final_brier_reward_long_step": 0.7353038787841797, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.773271918296814, "step": 112 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 521.0, "completions/max_terminated_length": 521.0, "completions/mean_length": 190.48828125, "completions/mean_terminated_length": 190.48828125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.1808, "grad_norm": 0.036182910203933716, "learning_rate": 9.110320284697508e-07, "loss": -0.0012, "num_tokens": 55716884.0, "reward": 1.3314650058746338, "reward_std": 0.21325276792049408, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.653796911239624, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.750187873840332, "step": 113 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/max_terminated_length": 422.0, "completions/mean_length": 197.38671875, "completions/mean_terminated_length": 197.38671875, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.1824, "grad_norm": 0.0341506227850914, "learning_rate": 9.092526690391459e-07, "loss": -0.0149, "num_tokens": 56184679.0, "reward": 1.3492083549499512, "reward_std": 0.1791898012161255, "rewards/accuracy_reward_long_step": 0.4921875, "rewards/final_brier_reward_long_step": 0.6604753732681274, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7676081657409668, "step": 114 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.0, "completions/max_terminated_length": 349.0, "completions/mean_length": 203.13671875, "completions/mean_terminated_length": 203.13671875, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.184, "grad_norm": 0.03318583592772484, "learning_rate": 9.074733096085408e-07, "loss": 0.012, "num_tokens": 56663218.0, "reward": 1.3214986324310303, "reward_std": 0.15533313155174255, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.6525156497955322, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7350417375564575, "step": 115 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/max_terminated_length": 450.0, "completions/mean_length": 196.93359375, "completions/mean_terminated_length": 196.93359375, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "epoch": 0.1856, "grad_norm": 0.04418765380978584, "learning_rate": 9.056939501779359e-07, "loss": -0.0076, "num_tokens": 57139865.0, "reward": 1.3460896015167236, "reward_std": 0.1907288283109665, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.668144941329956, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7709013223648071, "step": 116 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/max_terminated_length": 400.0, "completions/mean_length": 199.77734375, "completions/mean_terminated_length": 199.77734375, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.1872, "grad_norm": 0.031136956065893173, "learning_rate": 9.03914590747331e-07, "loss": -0.0184, "num_tokens": 57605152.0, "reward": 1.36716890335083, "reward_std": 0.19431088864803314, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.6628949642181396, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7745306491851807, "step": 117 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 614.0, "completions/max_terminated_length": 614.0, "completions/mean_length": 202.734375, "completions/mean_terminated_length": 202.734375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.1888, "grad_norm": 0.03184520825743675, "learning_rate": 9.02135231316726e-07, "loss": -0.0108, "num_tokens": 58073260.0, "reward": 1.2726809978485107, "reward_std": 0.20508110523223877, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.7245535254478455, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7802332639694214, "step": 118 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/max_terminated_length": 504.0, "completions/mean_length": 205.05859375, "completions/mean_terminated_length": 205.05859375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.1904, "grad_norm": 0.033281709998846054, "learning_rate": 9.00355871886121e-07, "loss": 0.0056, "num_tokens": 58547163.0, "reward": 1.2008931636810303, "reward_std": 0.12888304889202118, "rewards/accuracy_reward_long_step": 0.30078125, "rewards/final_brier_reward_long_step": 0.7928597927093506, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8075879216194153, "step": 119 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 207.6875, "completions/mean_terminated_length": 207.6875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.192, "grad_norm": 0.03089357167482376, "learning_rate": 8.98576512455516e-07, "loss": -0.0066, "num_tokens": 59008507.0, "reward": 1.337794303894043, "reward_std": 0.14488165080547333, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.6958640813827515, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.78031325340271, "step": 120 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 402.0, "completions/max_terminated_length": 402.0, "completions/mean_length": 210.828125, "completions/mean_terminated_length": 210.828125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.1936, "grad_norm": 0.03139025717973709, "learning_rate": 8.96797153024911e-07, "loss": 0.0168, "num_tokens": 59465479.0, "reward": 1.46078622341156, "reward_std": 0.2090751975774765, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.6339257955551147, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7404694557189941, "step": 121 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/max_terminated_length": 448.0, "completions/mean_length": 225.23828125, "completions/mean_terminated_length": 225.23828125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.1952, "grad_norm": 0.03497195616364479, "learning_rate": 8.950177935943059e-07, "loss": 0.0103, "num_tokens": 59950300.0, "reward": 1.329277515411377, "reward_std": 0.17536477744579315, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.7139711380004883, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7906389832496643, "step": 122 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/max_terminated_length": 491.0, "completions/mean_length": 213.8671875, "completions/mean_terminated_length": 213.8671875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.1968, "grad_norm": 0.03257234767079353, "learning_rate": 8.93238434163701e-07, "loss": -0.0167, "num_tokens": 60433226.0, "reward": 1.4588714838027954, "reward_std": 0.15642720460891724, "rewards/accuracy_reward_long_step": 0.59375, "rewards/final_brier_reward_long_step": 0.6580198407173157, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8024659752845764, "step": 123 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/max_terminated_length": 456.0, "completions/mean_length": 217.0, "completions/mean_terminated_length": 217.0, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.1984, "grad_norm": 0.03485409542918205, "learning_rate": 8.91459074733096e-07, "loss": 0.0054, "num_tokens": 60912170.0, "reward": 1.2661014795303345, "reward_std": 0.16737821698188782, "rewards/accuracy_reward_long_step": 0.3671875, "rewards/final_brier_reward_long_step": 0.7646335959434509, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8310222625732422, "step": 124 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/max_terminated_length": 456.0, "completions/mean_length": 222.78515625, "completions/mean_terminated_length": 222.78515625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.2, "grad_norm": 0.030056415125727654, "learning_rate": 8.896797153024911e-07, "loss": 0.0011, "num_tokens": 61399619.0, "reward": 1.3533146381378174, "reward_std": 0.18137666583061218, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7190214991569519, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8036117553710938, "step": 125 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 397.0, "completions/max_terminated_length": 397.0, "completions/mean_length": 212.23046875, "completions/mean_terminated_length": 212.23046875, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.2016, "grad_norm": 0.04059338942170143, "learning_rate": 8.879003558718861e-07, "loss": 0.0105, "num_tokens": 61886622.0, "reward": 1.386685848236084, "reward_std": 0.2339630126953125, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.7192109823226929, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8119070529937744, "step": 126 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 399.0, "completions/max_terminated_length": 399.0, "completions/mean_length": 212.375, "completions/mean_terminated_length": 212.375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.2032, "grad_norm": 0.16263210773468018, "learning_rate": 8.861209964412811e-07, "loss": 0.0043, "num_tokens": 62374030.0, "reward": 1.2957684993743896, "reward_std": 0.15368527173995972, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.7560636401176453, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8098228573799133, "step": 127 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/max_terminated_length": 431.0, "completions/mean_length": 215.7109375, "completions/mean_terminated_length": 215.7109375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.2048, "grad_norm": 0.039368387311697006, "learning_rate": 8.843416370106761e-07, "loss": 0.0028, "num_tokens": 62861236.0, "reward": 1.355287790298462, "reward_std": 0.21285982429981232, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7233257293701172, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7915753126144409, "step": 128 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/max_terminated_length": 380.0, "completions/mean_length": 208.1328125, "completions/mean_terminated_length": 208.1328125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.2064, "grad_norm": 0.03151082620024681, "learning_rate": 8.825622775800712e-07, "loss": -0.004, "num_tokens": 63345814.0, "reward": 1.3812355995178223, "reward_std": 0.2573654055595398, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7093117237091064, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8234432339668274, "step": 129 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 514.0, "completions/max_terminated_length": 514.0, "completions/mean_length": 216.51171875, "completions/mean_terminated_length": 216.51171875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.208, "grad_norm": 0.03233165293931961, "learning_rate": 8.807829181494661e-07, "loss": 0.0018, "num_tokens": 63809681.0, "reward": 1.3362829685211182, "reward_std": 0.12012840807437897, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.7555733919143677, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8083083033561707, "step": 130 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 213.39453125, "completions/mean_terminated_length": 213.39453125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.2096, "grad_norm": 0.028420858085155487, "learning_rate": 8.790035587188612e-07, "loss": 0.0054, "num_tokens": 64295150.0, "reward": 1.3931207656860352, "reward_std": 0.1800289750099182, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7075101733207703, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7868478298187256, "step": 131 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 220.79296875, "completions/mean_terminated_length": 220.79296875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.2112, "grad_norm": 0.030808866024017334, "learning_rate": 8.772241992882562e-07, "loss": -0.0, "num_tokens": 64780465.0, "reward": 1.2357356548309326, "reward_std": 0.1879829466342926, "rewards/accuracy_reward_long_step": 0.33984375, "rewards/final_brier_reward_long_step": 0.7859160304069519, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7976517081260681, "step": 132 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 210.00390625, "completions/mean_terminated_length": 210.00390625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.2128, "grad_norm": 0.030662264674901962, "learning_rate": 8.754448398576512e-07, "loss": -0.0054, "num_tokens": 65255178.0, "reward": 1.261284351348877, "reward_std": 0.19841524958610535, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7597503662109375, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7775742411613464, "step": 133 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/max_terminated_length": 371.0, "completions/mean_length": 202.95703125, "completions/mean_terminated_length": 202.95703125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.2144, "grad_norm": 0.03131137415766716, "learning_rate": 8.736654804270462e-07, "loss": 0.0037, "num_tokens": 65725903.0, "reward": 1.3632652759552002, "reward_std": 0.15484619140625, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7492175698280334, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7975935935974121, "step": 134 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/max_terminated_length": 501.0, "completions/mean_length": 216.09375, "completions/mean_terminated_length": 216.09375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.216, "grad_norm": 0.03075851872563362, "learning_rate": 8.718861209964412e-07, "loss": 0.0039, "num_tokens": 66198535.0, "reward": 1.4220490455627441, "reward_std": 0.13649022579193115, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.7138031125068665, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7868932485580444, "step": 135 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 216.6953125, "completions/mean_terminated_length": 216.6953125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.2176, "grad_norm": 0.03281315043568611, "learning_rate": 8.701067615658363e-07, "loss": 0.0138, "num_tokens": 66696009.0, "reward": 1.294492483139038, "reward_std": 0.23467326164245605, "rewards/accuracy_reward_long_step": 0.40234375, "rewards/final_brier_reward_long_step": 0.7704480886459351, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.798146665096283, "step": 136 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/max_terminated_length": 420.0, "completions/mean_length": 204.03515625, "completions/mean_terminated_length": 204.03515625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.2192, "grad_norm": 0.03420788049697876, "learning_rate": 8.683274021352312e-07, "loss": -0.0052, "num_tokens": 67178186.0, "reward": 1.439988613128662, "reward_std": 0.19740483164787292, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.7100058794021606, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8155736923217773, "step": 137 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 208.546875, "completions/mean_terminated_length": 208.546875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.2208, "grad_norm": 0.030317138880491257, "learning_rate": 8.665480427046264e-07, "loss": -0.0, "num_tokens": 67664638.0, "reward": 1.3270107507705688, "reward_std": 0.156023770570755, "rewards/accuracy_reward_long_step": 0.4296875, "rewards/final_brier_reward_long_step": 0.7715405821800232, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8255646228790283, "step": 138 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 216.421875, "completions/mean_terminated_length": 216.421875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.2224, "grad_norm": 0.0344645120203495, "learning_rate": 8.647686832740213e-07, "loss": -0.0067, "num_tokens": 68146034.0, "reward": 1.2016233205795288, "reward_std": 0.17633959650993347, "rewards/accuracy_reward_long_step": 0.30078125, "rewards/final_brier_reward_long_step": 0.7998050451278687, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.803563117980957, "step": 139 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.0, "completions/max_terminated_length": 393.0, "completions/mean_length": 209.14453125, "completions/mean_terminated_length": 209.14453125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.224, "grad_norm": 0.029939748346805573, "learning_rate": 8.629893238434164e-07, "loss": -0.0057, "num_tokens": 68631727.0, "reward": 1.4235725402832031, "reward_std": 0.14265938103199005, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.720478892326355, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7706866264343262, "step": 140 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/max_terminated_length": 362.0, "completions/mean_length": 209.578125, "completions/mean_terminated_length": 209.578125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.2256, "grad_norm": 0.030273519456386566, "learning_rate": 8.612099644128114e-07, "loss": -0.0046, "num_tokens": 69117867.0, "reward": 1.3343067169189453, "reward_std": 0.16028451919555664, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.7540902495384216, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7784492373466492, "step": 141 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 426.0, "completions/max_terminated_length": 426.0, "completions/mean_length": 220.03515625, "completions/mean_terminated_length": 220.03515625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.2272, "grad_norm": 0.03172041475772858, "learning_rate": 8.594306049822063e-07, "loss": 0.0148, "num_tokens": 69616820.0, "reward": 1.3687831163406372, "reward_std": 0.12923522293567657, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.7290538549423218, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7929534316062927, "step": 142 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/max_terminated_length": 428.0, "completions/mean_length": 218.36328125, "completions/mean_terminated_length": 218.36328125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.2288, "grad_norm": 0.03149149566888809, "learning_rate": 8.576512455516014e-07, "loss": 0.0306, "num_tokens": 70114945.0, "reward": 1.3903582096099854, "reward_std": 0.25830644369125366, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.6927156448364258, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7593424320220947, "step": 143 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/max_terminated_length": 450.0, "completions/mean_length": 208.25390625, "completions/mean_terminated_length": 208.25390625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.2304, "grad_norm": 0.03169076144695282, "learning_rate": 8.558718861209963e-07, "loss": 0.0027, "num_tokens": 70590714.0, "reward": 1.319934606552124, "reward_std": 0.18163828551769257, "rewards/accuracy_reward_long_step": 0.4296875, "rewards/final_brier_reward_long_step": 0.7742601633071899, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7867279052734375, "step": 144 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/max_terminated_length": 407.0, "completions/mean_length": 207.875, "completions/mean_terminated_length": 207.875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.232, "grad_norm": 0.055429354310035706, "learning_rate": 8.540925266903915e-07, "loss": 0.0016, "num_tokens": 71085394.0, "reward": 1.3640680313110352, "reward_std": 0.20486664772033691, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7340711355209351, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7847013473510742, "step": 145 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 375.0, "completions/max_terminated_length": 375.0, "completions/mean_length": 218.2109375, "completions/mean_terminated_length": 218.2109375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.2336, "grad_norm": 0.03302358463406563, "learning_rate": 8.523131672597864e-07, "loss": 0.0093, "num_tokens": 71574600.0, "reward": 1.4761652946472168, "reward_std": 0.2185746729373932, "rewards/accuracy_reward_long_step": 0.61328125, "rewards/final_brier_reward_long_step": 0.6924906373023987, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7668579816818237, "step": 146 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 477.0, "completions/max_terminated_length": 477.0, "completions/mean_length": 209.34765625, "completions/mean_terminated_length": 209.34765625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.2352, "grad_norm": 0.03392917290329933, "learning_rate": 8.505338078291815e-07, "loss": -0.0052, "num_tokens": 72047361.0, "reward": 1.4971990585327148, "reward_std": 0.17058077454566956, "rewards/accuracy_reward_long_step": 0.6328125, "rewards/final_brier_reward_long_step": 0.6820136904716492, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7755322456359863, "step": 147 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/max_terminated_length": 396.0, "completions/mean_length": 208.20703125, "completions/mean_terminated_length": 208.20703125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.2368, "grad_norm": 0.03449048101902008, "learning_rate": 8.487544483985765e-07, "loss": 0.0062, "num_tokens": 72528174.0, "reward": 1.5363779067993164, "reward_std": 0.17959806323051453, "rewards/accuracy_reward_long_step": 0.671875, "rewards/final_brier_reward_long_step": 0.7153710722923279, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7426407337188721, "step": 148 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 208.875, "completions/mean_terminated_length": 208.875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.2384, "grad_norm": 0.030924499034881592, "learning_rate": 8.469750889679715e-07, "loss": 0.0016, "num_tokens": 73014590.0, "reward": 1.2735717296600342, "reward_std": 0.16099971532821655, "rewards/accuracy_reward_long_step": 0.38671875, "rewards/final_brier_reward_long_step": 0.7689594030380249, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7784522771835327, "step": 149 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/max_terminated_length": 430.0, "completions/mean_length": 222.2109375, "completions/mean_terminated_length": 222.2109375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.24, "grad_norm": 0.0321771465241909, "learning_rate": 8.451957295373665e-07, "loss": -0.0022, "num_tokens": 73481692.0, "reward": 1.3676480054855347, "reward_std": 0.2422865331172943, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7435883283615112, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8051284551620483, "step": 150 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/max_terminated_length": 500.0, "completions/mean_length": 215.85546875, "completions/mean_terminated_length": 215.85546875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.2416, "grad_norm": 0.03657348453998566, "learning_rate": 8.434163701067614e-07, "loss": 0.0096, "num_tokens": 73961575.0, "reward": 1.4443353414535522, "reward_std": 0.21228715777397156, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.7566316723823547, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7863348126411438, "step": 151 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 398.0, "completions/max_terminated_length": 398.0, "completions/mean_length": 213.53515625, "completions/mean_terminated_length": 213.53515625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.2432, "grad_norm": 0.03434426710009575, "learning_rate": 8.416370106761566e-07, "loss": 0.0015, "num_tokens": 74427848.0, "reward": 1.223260521888733, "reward_std": 0.18570977449417114, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.781054675579071, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7838626503944397, "step": 152 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/max_terminated_length": 469.0, "completions/mean_length": 219.49609375, "completions/mean_terminated_length": 219.49609375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.2448, "grad_norm": 0.029247252270579338, "learning_rate": 8.398576512455516e-07, "loss": 0.0116, "num_tokens": 74910703.0, "reward": 1.4385360479354858, "reward_std": 0.24247096478939056, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.7055359482765198, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7829832434654236, "step": 153 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 376.0, "completions/max_terminated_length": 376.0, "completions/mean_length": 220.2734375, "completions/mean_terminated_length": 220.2734375, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.2464, "grad_norm": 0.02955593541264534, "learning_rate": 8.380782918149466e-07, "loss": -0.0045, "num_tokens": 75393821.0, "reward": 1.454419732093811, "reward_std": 0.15751710534095764, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7552148699760437, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7812142968177795, "step": 154 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/max_terminated_length": 474.0, "completions/mean_length": 218.16796875, "completions/mean_terminated_length": 218.16796875, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.248, "grad_norm": 0.03182348608970642, "learning_rate": 8.362989323843416e-07, "loss": 0.0059, "num_tokens": 75881664.0, "reward": 1.4293267726898193, "reward_std": 0.20279854536056519, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.7694789171218872, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7915782928466797, "step": 155 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/max_terminated_length": 496.0, "completions/mean_length": 238.328125, "completions/mean_terminated_length": 238.328125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.2496, "grad_norm": 0.030359633266925812, "learning_rate": 8.345195729537366e-07, "loss": 0.0028, "num_tokens": 76368372.0, "reward": 1.3188002109527588, "reward_std": 0.2175511121749878, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7726074457168579, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7682181596755981, "step": 156 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 225.96484375, "completions/mean_terminated_length": 225.96484375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.2512, "grad_norm": 0.03001994453370571, "learning_rate": 8.327402135231316e-07, "loss": 0.0162, "num_tokens": 76846323.0, "reward": 1.509331226348877, "reward_std": 0.18848416209220886, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.7627733945846558, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.805801272392273, "step": 157 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/max_terminated_length": 431.0, "completions/mean_length": 245.515625, "completions/mean_terminated_length": 245.515625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.2528, "grad_norm": 0.02940976247191429, "learning_rate": 8.309608540925266e-07, "loss": 0.0073, "num_tokens": 77351735.0, "reward": 1.2970399856567383, "reward_std": 0.19197387993335724, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7378246188163757, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8018975853919983, "step": 158 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 602.0, "completions/max_terminated_length": 602.0, "completions/mean_length": 238.7890625, "completions/mean_terminated_length": 238.7890625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.2544, "grad_norm": 0.030583331361413002, "learning_rate": 8.291814946619217e-07, "loss": 0.0032, "num_tokens": 77840273.0, "reward": 1.191973090171814, "reward_std": 0.15967227518558502, "rewards/accuracy_reward_long_step": 0.29296875, "rewards/final_brier_reward_long_step": 0.7833398580551147, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8126777410507202, "step": 159 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/max_terminated_length": 447.0, "completions/mean_length": 222.33984375, "completions/mean_terminated_length": 222.33984375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.256, "grad_norm": 0.02875317633152008, "learning_rate": 8.274021352313167e-07, "loss": 0.001, "num_tokens": 78315032.0, "reward": 1.4728080034255981, "reward_std": 0.1814574897289276, "rewards/accuracy_reward_long_step": 0.578125, "rewards/final_brier_reward_long_step": 0.7983136773109436, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7804182767868042, "step": 160 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/max_terminated_length": 396.0, "completions/mean_length": 243.73828125, "completions/mean_terminated_length": 243.73828125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.2576, "grad_norm": 0.028153013437986374, "learning_rate": 8.256227758007117e-07, "loss": -0.0008, "num_tokens": 78800285.0, "reward": 1.3710644245147705, "reward_std": 0.21033860743045807, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7794238328933716, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7829592227935791, "step": 161 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/max_terminated_length": 539.0, "completions/mean_length": 247.23828125, "completions/mean_terminated_length": 247.23828125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.2592, "grad_norm": 0.02903878502547741, "learning_rate": 8.238434163701067e-07, "loss": 0.0002, "num_tokens": 79278586.0, "reward": 1.3124269247055054, "reward_std": 0.22732782363891602, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.7707054615020752, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7758771181106567, "step": 162 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 574.0, "completions/max_terminated_length": 574.0, "completions/mean_length": 249.76171875, "completions/mean_terminated_length": 249.76171875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.2608, "grad_norm": 0.028138713911175728, "learning_rate": 8.220640569395017e-07, "loss": -0.0073, "num_tokens": 79765109.0, "reward": 1.4664216041564941, "reward_std": 0.16438095271587372, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7759047150611877, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.808531641960144, "step": 163 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 241.08984375, "completions/mean_terminated_length": 241.08984375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.2624, "grad_norm": 0.028392404317855835, "learning_rate": 8.202846975088967e-07, "loss": 0.0032, "num_tokens": 80258676.0, "reward": 1.501215934753418, "reward_std": 0.17474979162216187, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.7508887052536011, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7852252721786499, "step": 164 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/max_terminated_length": 460.0, "completions/mean_length": 231.37109375, "completions/mean_terminated_length": 231.37109375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.264, "grad_norm": 0.028890179470181465, "learning_rate": 8.185053380782919e-07, "loss": 0.0041, "num_tokens": 80732043.0, "reward": 1.4701359272003174, "reward_std": 0.1824515014886856, "rewards/accuracy_reward_long_step": 0.58203125, "rewards/final_brier_reward_long_step": 0.7417089939117432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8107101321220398, "step": 165 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/max_terminated_length": 447.0, "completions/mean_length": 238.31640625, "completions/mean_terminated_length": 238.31640625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.2656, "grad_norm": 0.04968814551830292, "learning_rate": 8.167259786476868e-07, "loss": 0.0087, "num_tokens": 81219316.0, "reward": 1.3115195035934448, "reward_std": 0.1865576058626175, "rewards/accuracy_reward_long_step": 0.421875, "rewards/final_brier_reward_long_step": 0.7827734351158142, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7758046984672546, "step": 166 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/max_terminated_length": 473.0, "completions/mean_length": 246.90625, "completions/mean_terminated_length": 246.90625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.2672, "grad_norm": 0.02806948870420456, "learning_rate": 8.149466192170819e-07, "loss": -0.0126, "num_tokens": 81722972.0, "reward": 1.287018060684204, "reward_std": 0.17807143926620483, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.7411332130432129, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.766313910484314, "step": 167 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/max_terminated_length": 430.0, "completions/mean_length": 242.71875, "completions/mean_terminated_length": 242.71875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.2688, "grad_norm": 0.02786392532289028, "learning_rate": 8.131672597864768e-07, "loss": -0.0143, "num_tokens": 82208676.0, "reward": 1.3826302289962769, "reward_std": 0.16845399141311646, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7937402129173279, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8305305242538452, "step": 168 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/max_terminated_length": 408.0, "completions/mean_length": 248.65234375, "completions/mean_terminated_length": 248.65234375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.2704, "grad_norm": 0.02810235507786274, "learning_rate": 8.113879003558719e-07, "loss": -0.0028, "num_tokens": 82703459.0, "reward": 1.2465064525604248, "reward_std": 0.1827697902917862, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.701416015625, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7846096754074097, "step": 169 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 616.0, "completions/max_terminated_length": 616.0, "completions/mean_length": 256.98046875, "completions/mean_terminated_length": 256.98046875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.272, "grad_norm": 0.03040560707449913, "learning_rate": 8.096085409252668e-07, "loss": -0.0086, "num_tokens": 83174350.0, "reward": 1.3326289653778076, "reward_std": 0.19962584972381592, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7591210603713989, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7823324203491211, "step": 170 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 595.0, "completions/max_terminated_length": 595.0, "completions/mean_length": 250.66015625, "completions/mean_terminated_length": 250.66015625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.2736, "grad_norm": 0.02771839126944542, "learning_rate": 8.078291814946618e-07, "loss": -0.0059, "num_tokens": 83664551.0, "reward": 1.3717570304870605, "reward_std": 0.20381051301956177, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7613476514816284, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8038052320480347, "step": 171 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 245.0703125, "completions/mean_terminated_length": 245.0703125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.2752, "grad_norm": 0.03148366138339043, "learning_rate": 8.06049822064057e-07, "loss": 0.0026, "num_tokens": 84163441.0, "reward": 1.2595324516296387, "reward_std": 0.21818827092647552, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7574383020401001, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7650665044784546, "step": 172 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 392.0, "completions/max_terminated_length": 392.0, "completions/mean_length": 241.73828125, "completions/mean_terminated_length": 241.73828125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.2768, "grad_norm": 0.027540108188986778, "learning_rate": 8.042704626334519e-07, "loss": 0.0026, "num_tokens": 84651926.0, "reward": 1.3201969861984253, "reward_std": 0.23394638299942017, "rewards/accuracy_reward_long_step": 0.4296875, "rewards/final_brier_reward_long_step": 0.7422363758087158, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8198016285896301, "step": 173 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/max_terminated_length": 639.0, "completions/mean_length": 245.08984375, "completions/mean_terminated_length": 245.08984375, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.2784, "grad_norm": 0.02940957434475422, "learning_rate": 8.02491103202847e-07, "loss": -0.0074, "num_tokens": 85147357.0, "reward": 1.2906839847564697, "reward_std": 0.2153157889842987, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7498632669448853, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7644352912902832, "step": 174 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/max_terminated_length": 442.0, "completions/mean_length": 245.8515625, "completions/mean_terminated_length": 245.8515625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.28, "grad_norm": 0.03165048733353615, "learning_rate": 8.007117437722419e-07, "loss": 0.0137, "num_tokens": 85624735.0, "reward": 1.4603183269500732, "reward_std": 0.22203630208969116, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7952343821525574, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.796039342880249, "step": 175 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 407.0, "completions/max_terminated_length": 407.0, "completions/mean_length": 250.11328125, "completions/mean_terminated_length": 251.09413146972656, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.2816, "grad_norm": 0.0285260621458292, "learning_rate": 7.98932384341637e-07, "loss": -0.0097, "num_tokens": 86101692.0, "reward": 1.4544804096221924, "reward_std": 0.21490904688835144, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7437987923622131, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.8084980249404907, "step": 176 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.0, "completions/max_terminated_length": 387.0, "completions/mean_length": 247.91015625, "completions/mean_terminated_length": 247.91015625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.2832, "grad_norm": 0.02845529466867447, "learning_rate": 7.97153024911032e-07, "loss": 0.0112, "num_tokens": 86593685.0, "reward": 1.3518202304840088, "reward_std": 0.15090304613113403, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.7558691501617432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7764118909835815, "step": 177 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 719.0, "completions/max_terminated_length": 719.0, "completions/mean_length": 253.98046875, "completions/mean_terminated_length": 253.98046875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.2848, "grad_norm": 0.029682578518986702, "learning_rate": 7.95373665480427e-07, "loss": -0.0156, "num_tokens": 87076488.0, "reward": 1.3396403789520264, "reward_std": 0.1541176736354828, "rewards/accuracy_reward_long_step": 0.4375, "rewards/final_brier_reward_long_step": 0.8080171346664429, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8005446195602417, "step": 178 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 248.99609375, "completions/mean_terminated_length": 248.99609375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.2864, "grad_norm": 0.028529809787869453, "learning_rate": 7.935943060498221e-07, "loss": 0.0108, "num_tokens": 87564831.0, "reward": 1.3985368013381958, "reward_std": 0.15740060806274414, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.7856543064117432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8241176605224609, "step": 179 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 242.43359375, "completions/mean_terminated_length": 242.43359375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.288, "grad_norm": 0.029515286907553673, "learning_rate": 7.91814946619217e-07, "loss": -0.0121, "num_tokens": 88038206.0, "reward": 1.4421117305755615, "reward_std": 0.245658278465271, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.7302929759025574, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7725290060043335, "step": 180 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 247.80859375, "completions/mean_terminated_length": 247.80859375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.2896, "grad_norm": 0.028533408418297768, "learning_rate": 7.900355871886121e-07, "loss": -0.0021, "num_tokens": 88526117.0, "reward": 1.4392614364624023, "reward_std": 0.1886727213859558, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7036230564117432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7721726894378662, "step": 181 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 402.0, "completions/max_terminated_length": 402.0, "completions/mean_length": 247.95703125, "completions/mean_terminated_length": 247.95703125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.2912, "grad_norm": 0.03011815994977951, "learning_rate": 7.88256227758007e-07, "loss": 0.0105, "num_tokens": 89007906.0, "reward": 1.3243916034698486, "reward_std": 0.23409831523895264, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7340039014816284, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7666873931884766, "step": 182 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 257.90234375, "completions/mean_terminated_length": 257.90234375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.2928, "grad_norm": 0.02743351273238659, "learning_rate": 7.864768683274021e-07, "loss": 0.0068, "num_tokens": 89487889.0, "reward": 1.3753552436828613, "reward_std": 0.13086272776126862, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7642577886581421, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7996633648872375, "step": 183 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 714.0, "completions/max_terminated_length": 714.0, "completions/mean_length": 260.5, "completions/mean_terminated_length": 260.5, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.2944, "grad_norm": 0.02766694501042366, "learning_rate": 7.846975088967971e-07, "loss": 0.0127, "num_tokens": 89978625.0, "reward": 1.4390387535095215, "reward_std": 0.17982302606105804, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.7528809309005737, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7688993811607361, "step": 184 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/max_terminated_length": 419.0, "completions/mean_length": 271.25390625, "completions/mean_terminated_length": 271.25390625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.296, "grad_norm": 0.0269797183573246, "learning_rate": 7.829181494661921e-07, "loss": -0.0066, "num_tokens": 90473290.0, "reward": 1.3175511360168457, "reward_std": 0.13507431745529175, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.7091602087020874, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7485443353652954, "step": 185 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 268.40625, "completions/mean_terminated_length": 268.40625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.2976, "grad_norm": 0.027505241334438324, "learning_rate": 7.811387900355872e-07, "loss": -0.0114, "num_tokens": 90958874.0, "reward": 1.517210602760315, "reward_std": 0.18232710659503937, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.8131054639816284, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7869866490364075, "step": 186 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/max_terminated_length": 481.0, "completions/mean_length": 275.80859375, "completions/mean_terminated_length": 275.80859375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.2992, "grad_norm": 0.0265911016613245, "learning_rate": 7.793594306049822e-07, "loss": -0.0118, "num_tokens": 91450809.0, "reward": 1.4153180122375488, "reward_std": 0.20222672820091248, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.724365234375, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8275318145751953, "step": 187 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 282.4375, "completions/mean_terminated_length": 282.4375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.3008, "grad_norm": 0.02835831232368946, "learning_rate": 7.775800711743772e-07, "loss": 0.0055, "num_tokens": 91962913.0, "reward": 1.3732486963272095, "reward_std": 0.21462617814540863, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7563574314117432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.799137532711029, "step": 188 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 282.69140625, "completions/mean_terminated_length": 282.69140625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.3024, "grad_norm": 0.026161570101976395, "learning_rate": 7.758007117437722e-07, "loss": 0.0141, "num_tokens": 92451986.0, "reward": 1.4073671102523804, "reward_std": 0.12184424698352814, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.825976550579071, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8034918308258057, "step": 189 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 286.796875, "completions/mean_terminated_length": 286.796875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.304, "grad_norm": 0.027012880891561508, "learning_rate": 7.740213523131672e-07, "loss": 0.0021, "num_tokens": 92948694.0, "reward": 1.2427858114242554, "reward_std": 0.235196053981781, "rewards/accuracy_reward_long_step": 0.37109375, "rewards/final_brier_reward_long_step": 0.6898242235183716, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7969439625740051, "step": 190 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/max_terminated_length": 517.0, "completions/mean_length": 282.46484375, "completions/mean_terminated_length": 282.46484375, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.3056, "grad_norm": 0.037487372756004333, "learning_rate": 7.722419928825622e-07, "loss": 0.0056, "num_tokens": 93449965.0, "reward": 1.3565409183502197, "reward_std": 0.14568164944648743, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.733447253704071, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8177168369293213, "step": 191 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/max_terminated_length": 547.0, "completions/mean_length": 294.59375, "completions/mean_terminated_length": 294.59375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.3072, "grad_norm": 0.026451628655195236, "learning_rate": 7.704626334519572e-07, "loss": 0.0008, "num_tokens": 93958261.0, "reward": 1.1881611347198486, "reward_std": 0.18037152290344238, "rewards/accuracy_reward_long_step": 0.296875, "rewards/final_brier_reward_long_step": 0.7727734446525574, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.792371392250061, "step": 192 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 290.57421875, "completions/mean_terminated_length": 290.57421875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.3088, "grad_norm": 0.026738133281469345, "learning_rate": 7.686832740213523e-07, "loss": 0.0111, "num_tokens": 94465464.0, "reward": 1.466090202331543, "reward_std": 0.17279371619224548, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.7865039110183716, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8122318983078003, "step": 193 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 511.0, "completions/max_terminated_length": 511.0, "completions/mean_length": 282.94921875, "completions/mean_terminated_length": 282.94921875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.3104, "grad_norm": 0.02935073897242546, "learning_rate": 7.669039145907473e-07, "loss": 0.0057, "num_tokens": 94968371.0, "reward": 1.3700523376464844, "reward_std": 0.21206629276275635, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7314281463623047, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8269064426422119, "step": 194 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 532.0, "completions/max_terminated_length": 532.0, "completions/mean_length": 276.2578125, "completions/mean_terminated_length": 276.2578125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.312, "grad_norm": 0.028218043968081474, "learning_rate": 7.651245551601423e-07, "loss": -0.0055, "num_tokens": 95464117.0, "reward": 1.5009515285491943, "reward_std": 0.16311804950237274, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.7480566501617432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8338742256164551, "step": 195 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/max_terminated_length": 539.0, "completions/mean_length": 277.2578125, "completions/mean_terminated_length": 277.2578125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.3136, "grad_norm": 0.029587451368570328, "learning_rate": 7.633451957295374e-07, "loss": 0.004, "num_tokens": 95963519.0, "reward": 1.6074358224868774, "reward_std": 0.1871250867843628, "rewards/accuracy_reward_long_step": 0.6953125, "rewards/final_brier_reward_long_step": 0.8149710893630981, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8335224390029907, "step": 196 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/max_terminated_length": 495.0, "completions/mean_length": 273.578125, "completions/mean_terminated_length": 273.578125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.3152, "grad_norm": 0.029551653191447258, "learning_rate": 7.615658362989323e-07, "loss": 0.008, "num_tokens": 96461403.0, "reward": 1.3781944513320923, "reward_std": 0.17420879006385803, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.8260058164596558, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8117718696594238, "step": 197 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 274.83984375, "completions/mean_terminated_length": 274.83984375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.3168, "grad_norm": 0.02734399028122425, "learning_rate": 7.597864768683274e-07, "loss": 0.0036, "num_tokens": 96960098.0, "reward": 1.3913518190383911, "reward_std": 0.20948463678359985, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.7432616949081421, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8065208792686462, "step": 198 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 586.0, "completions/max_terminated_length": 586.0, "completions/mean_length": 285.73046875, "completions/mean_terminated_length": 285.73046875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.3184, "grad_norm": 0.025479217991232872, "learning_rate": 7.580071174377223e-07, "loss": -0.0064, "num_tokens": 97462677.0, "reward": 1.3470879793167114, "reward_std": 0.17569580674171448, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.7766375541687012, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7523394823074341, "step": 199 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/max_terminated_length": 487.0, "completions/mean_length": 276.9609375, "completions/mean_terminated_length": 276.9609375, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.32, "grad_norm": 0.026502054184675217, "learning_rate": 7.562277580071174e-07, "loss": 0.003, "num_tokens": 97933083.0, "reward": 1.3517411947250366, "reward_std": 0.1127757877111435, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.7498577833175659, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7821072340011597, "step": 200 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/max_terminated_length": 440.0, "completions/mean_length": 277.74609375, "completions/mean_terminated_length": 277.74609375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.3216, "grad_norm": 0.026896316558122635, "learning_rate": 7.544483985765125e-07, "loss": 0.0045, "num_tokens": 98407330.0, "reward": 1.240262508392334, "reward_std": 0.16174383461475372, "rewards/accuracy_reward_long_step": 0.35546875, "rewards/final_brier_reward_long_step": 0.798291027545929, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7408840656280518, "step": 201 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 553.0, "completions/max_terminated_length": 553.0, "completions/mean_length": 261.98828125, "completions/mean_terminated_length": 261.98828125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.3232, "grad_norm": 0.027860237285494804, "learning_rate": 7.526690391459074e-07, "loss": 0.0018, "num_tokens": 98894503.0, "reward": 1.223290205001831, "reward_std": 0.15445607900619507, "rewards/accuracy_reward_long_step": 0.3515625, "rewards/final_brier_reward_long_step": 0.7443945407867432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7425163984298706, "step": 202 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/max_terminated_length": 481.0, "completions/mean_length": 274.48828125, "completions/mean_terminated_length": 274.48828125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.3248, "grad_norm": 0.02505328133702278, "learning_rate": 7.508896797153025e-07, "loss": -0.0032, "num_tokens": 99377636.0, "reward": 1.4042761325836182, "reward_std": 0.2108316421508789, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.7737988233566284, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8120555877685547, "step": 203 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 528.0, "completions/max_terminated_length": 528.0, "completions/mean_length": 271.3203125, "completions/mean_terminated_length": 271.3203125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.3264, "grad_norm": 0.027994032949209213, "learning_rate": 7.491103202846974e-07, "loss": 0.0018, "num_tokens": 99877070.0, "reward": 1.4199368953704834, "reward_std": 0.1661403775215149, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7513816356658936, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8033663034439087, "step": 204 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 485.0, "completions/max_terminated_length": 485.0, "completions/mean_length": 265.5625, "completions/mean_terminated_length": 266.60394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.328, "grad_norm": 0.02644249238073826, "learning_rate": 7.473309608540925e-07, "loss": -0.0023, "num_tokens": 100362006.0, "reward": 1.4836362600326538, "reward_std": 0.2127230167388916, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.7705457210540771, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.828061580657959, "step": 205 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/max_terminated_length": 429.0, "completions/mean_length": 260.26953125, "completions/mean_terminated_length": 260.26953125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.3296, "grad_norm": 0.02795090340077877, "learning_rate": 7.455516014234874e-07, "loss": 0.0014, "num_tokens": 100836259.0, "reward": 1.4013100862503052, "reward_std": 0.17857375741004944, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7368500232696533, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7433903217315674, "step": 206 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/max_terminated_length": 463.0, "completions/mean_length": 257.23828125, "completions/mean_terminated_length": 257.23828125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.3312, "grad_norm": 0.028254900127649307, "learning_rate": 7.437722419928826e-07, "loss": 0.0146, "num_tokens": 101316480.0, "reward": 1.4756114482879639, "reward_std": 0.21930184960365295, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.7034816741943359, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7770892381668091, "step": 207 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 799.0, "completions/max_terminated_length": 799.0, "completions/mean_length": 275.65625, "completions/mean_terminated_length": 275.65625, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.3328, "grad_norm": 0.02601105347275734, "learning_rate": 7.419928825622776e-07, "loss": -0.0054, "num_tokens": 101820640.0, "reward": 1.4046986103057861, "reward_std": 0.1485091894865036, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7158496379852295, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7779449224472046, "step": 208 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/max_terminated_length": 547.0, "completions/mean_length": 275.76953125, "completions/mean_terminated_length": 275.76953125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.3344, "grad_norm": 0.027420159429311752, "learning_rate": 7.402135231316725e-07, "loss": 0.0148, "num_tokens": 102301669.0, "reward": 1.3544528484344482, "reward_std": 0.12338382005691528, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.750058650970459, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8396276235580444, "step": 209 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/max_terminated_length": 421.0, "completions/mean_length": 260.109375, "completions/mean_terminated_length": 260.109375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.336, "grad_norm": 0.027533669024705887, "learning_rate": 7.384341637010676e-07, "loss": -0.0076, "num_tokens": 102791329.0, "reward": 1.4349637031555176, "reward_std": 0.23317797482013702, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7997035384178162, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8151513338088989, "step": 210 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 522.0, "completions/max_terminated_length": 522.0, "completions/mean_length": 262.828125, "completions/mean_terminated_length": 262.828125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.3376, "grad_norm": 0.028309568762779236, "learning_rate": 7.366548042704625e-07, "loss": 0.0238, "num_tokens": 103289013.0, "reward": 1.3645201921463013, "reward_std": 0.20173460245132446, "rewards/accuracy_reward_long_step": 0.4609375, "rewards/final_brier_reward_long_step": 0.8263086080551147, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.788021981716156, "step": 211 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/max_terminated_length": 422.0, "completions/mean_length": 257.796875, "completions/mean_terminated_length": 257.796875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.3392, "grad_norm": 0.02732851170003414, "learning_rate": 7.348754448398576e-07, "loss": -0.005, "num_tokens": 103776305.0, "reward": 1.3025028705596924, "reward_std": 0.19672125577926636, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7558465003967285, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7822898626327515, "step": 212 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/max_terminated_length": 427.0, "completions/mean_length": 252.21484375, "completions/mean_terminated_length": 252.21484375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.3408, "grad_norm": 0.02799782156944275, "learning_rate": 7.330960854092527e-07, "loss": -0.0066, "num_tokens": 104269640.0, "reward": 1.3634313344955444, "reward_std": 0.23930571973323822, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.72954922914505, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.810113787651062, "step": 213 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/max_terminated_length": 478.0, "completions/mean_length": 251.56640625, "completions/mean_terminated_length": 251.56640625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.3424, "grad_norm": 0.029079895466566086, "learning_rate": 7.313167259786477e-07, "loss": -0.0028, "num_tokens": 104767105.0, "reward": 1.3156641721725464, "reward_std": 0.15963619947433472, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7907624840736389, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8156442642211914, "step": 214 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/max_terminated_length": 440.0, "completions/mean_length": 248.34375, "completions/mean_terminated_length": 248.34375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.344, "grad_norm": 0.029702944681048393, "learning_rate": 7.295373665480427e-07, "loss": 0.0191, "num_tokens": 105264905.0, "reward": 1.2511861324310303, "reward_std": 0.13276691734790802, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.7566503286361694, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7559068202972412, "step": 215 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.0, "completions/max_terminated_length": 382.0, "completions/mean_length": 240.96875, "completions/mean_terminated_length": 240.96875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.3456, "grad_norm": 0.02834567055106163, "learning_rate": 7.277580071174377e-07, "loss": -0.0024, "num_tokens": 105756457.0, "reward": 1.4394803047180176, "reward_std": 0.2180296927690506, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7011808753013611, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8067399859428406, "step": 216 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/max_terminated_length": 489.0, "completions/mean_length": 247.52734375, "completions/mean_terminated_length": 247.52734375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.3472, "grad_norm": 0.027301594614982605, "learning_rate": 7.259786476868327e-07, "loss": 0.0056, "num_tokens": 106254728.0, "reward": 1.3793138265609741, "reward_std": 0.16131410002708435, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7766991853713989, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.818681001663208, "step": 217 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 246.4140625, "completions/mean_terminated_length": 246.4140625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.3488, "grad_norm": 0.02674778178334236, "learning_rate": 7.241992882562277e-07, "loss": -0.0079, "num_tokens": 106746986.0, "reward": 1.3636713027954102, "reward_std": 0.15767797827720642, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.743729293346405, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7734558582305908, "step": 218 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 250.6796875, "completions/mean_terminated_length": 250.6796875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.3504, "grad_norm": 0.028994156047701836, "learning_rate": 7.224199288256227e-07, "loss": -0.015, "num_tokens": 107246088.0, "reward": 1.3627524375915527, "reward_std": 0.21599024534225464, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.7811144590377808, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8027076721191406, "step": 219 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/max_terminated_length": 400.0, "completions/mean_length": 238.0390625, "completions/mean_terminated_length": 238.0390625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.352, "grad_norm": 0.03368399292230606, "learning_rate": 7.206405693950178e-07, "loss": -0.0043, "num_tokens": 107737026.0, "reward": 1.3865300416946411, "reward_std": 0.20244070887565613, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7735304236412048, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8350895047187805, "step": 220 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/max_terminated_length": 420.0, "completions/mean_length": 234.5234375, "completions/mean_terminated_length": 234.5234375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.3536, "grad_norm": 0.030342837795615196, "learning_rate": 7.188612099644128e-07, "loss": 0.0076, "num_tokens": 108207696.0, "reward": 1.3147838115692139, "reward_std": 0.14057135581970215, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.7470492124557495, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7386487722396851, "step": 221 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/max_terminated_length": 432.0, "completions/mean_length": 243.98046875, "completions/mean_terminated_length": 243.98046875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.3552, "grad_norm": 0.028509140014648438, "learning_rate": 7.170818505338078e-07, "loss": 0.0073, "num_tokens": 108685563.0, "reward": 1.4801634550094604, "reward_std": 0.2189689725637436, "rewards/accuracy_reward_long_step": 0.57421875, "rewards/final_brier_reward_long_step": 0.7850944995880127, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8386842608451843, "step": 222 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.0, "completions/max_terminated_length": 393.0, "completions/mean_length": 234.9921875, "completions/mean_terminated_length": 234.9921875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.3568, "grad_norm": 0.02966553531587124, "learning_rate": 7.153024911032028e-07, "loss": 0.0007, "num_tokens": 109167289.0, "reward": 1.377956509590149, "reward_std": 0.15985409915447235, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7817855477333069, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8081655502319336, "step": 223 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 383.0, "completions/max_terminated_length": 383.0, "completions/mean_length": 231.79296875, "completions/mean_terminated_length": 231.79296875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.3584, "grad_norm": 0.030611420050263405, "learning_rate": 7.135231316725978e-07, "loss": 0.0122, "num_tokens": 109655324.0, "reward": 1.3960628509521484, "reward_std": 0.11526073515415192, "rewards/accuracy_reward_long_step": 0.4609375, "rewards/final_brier_reward_long_step": 0.865734338760376, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8747667074203491, "step": 224 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/max_terminated_length": 424.0, "completions/mean_length": 239.73828125, "completions/mean_terminated_length": 239.73828125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.36, "grad_norm": 0.032648514956235886, "learning_rate": 7.117437722419929e-07, "loss": 0.0134, "num_tokens": 110142809.0, "reward": 1.300749659538269, "reward_std": 0.20763415098190308, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7577574253082275, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7733659744262695, "step": 225 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/max_terminated_length": 480.0, "completions/mean_length": 251.1875, "completions/mean_terminated_length": 251.1875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.3616, "grad_norm": 0.027327539399266243, "learning_rate": 7.099644128113878e-07, "loss": 0.0023, "num_tokens": 110646393.0, "reward": 1.5032904148101807, "reward_std": 0.1392737776041031, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.8319687843322754, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8374428749084473, "step": 226 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/max_terminated_length": 406.0, "completions/mean_length": 248.3046875, "completions/mean_terminated_length": 248.3046875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.3632, "grad_norm": 0.028683941811323166, "learning_rate": 7.08185053380783e-07, "loss": -0.0104, "num_tokens": 111139431.0, "reward": 1.2084152698516846, "reward_std": 0.1405543088912964, "rewards/accuracy_reward_long_step": 0.30859375, "rewards/final_brier_reward_long_step": 0.7831394672393799, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8161464929580688, "step": 227 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/max_terminated_length": 369.0, "completions/mean_length": 248.3671875, "completions/mean_terminated_length": 248.3671875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.3648, "grad_norm": 0.02712222747504711, "learning_rate": 7.064056939501779e-07, "loss": -0.006, "num_tokens": 111635245.0, "reward": 1.341683030128479, "reward_std": 0.18412762880325317, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.7527234554290771, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8483837842941284, "step": 228 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/max_terminated_length": 457.0, "completions/mean_length": 242.3046875, "completions/mean_terminated_length": 242.3046875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.3664, "grad_norm": 0.028868133202195168, "learning_rate": 7.046263345195729e-07, "loss": 0.0127, "num_tokens": 112127091.0, "reward": 1.4517192840576172, "reward_std": 0.16360458731651306, "rewards/accuracy_reward_long_step": 0.578125, "rewards/final_brier_reward_long_step": 0.7802172303199768, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7219727039337158, "step": 229 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 249.1953125, "completions/mean_terminated_length": 249.1953125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.368, "grad_norm": 0.030503099784255028, "learning_rate": 7.028469750889679e-07, "loss": 0.0012, "num_tokens": 112615557.0, "reward": 1.2901397943496704, "reward_std": 0.1938847005367279, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.6676468849182129, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7585374116897583, "step": 230 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/max_terminated_length": 422.0, "completions/mean_length": 236.74609375, "completions/mean_terminated_length": 236.74609375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.3696, "grad_norm": 0.031112175434827805, "learning_rate": 7.010676156583629e-07, "loss": 0.0072, "num_tokens": 113107308.0, "reward": 1.3856728076934814, "reward_std": 0.1376914530992508, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.6620769500732422, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.693114161491394, "step": 231 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/max_terminated_length": 427.0, "completions/mean_length": 235.6640625, "completions/mean_terminated_length": 235.6640625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.3712, "grad_norm": 0.028059890493750572, "learning_rate": 6.99288256227758e-07, "loss": -0.0005, "num_tokens": 113587182.0, "reward": 1.4213546514511108, "reward_std": 0.2364693284034729, "rewards/accuracy_reward_long_step": 0.5546875, "rewards/final_brier_reward_long_step": 0.7457855343818665, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7208831310272217, "step": 232 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/max_terminated_length": 428.0, "completions/mean_length": 235.29296875, "completions/mean_terminated_length": 235.29296875, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.3728, "grad_norm": 0.027861230075359344, "learning_rate": 6.975088967971529e-07, "loss": 0.0032, "num_tokens": 114059153.0, "reward": 1.3009544610977173, "reward_std": 0.12013030052185059, "rewards/accuracy_reward_long_step": 0.38671875, "rewards/final_brier_reward_long_step": 0.8454800844192505, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8192753195762634, "step": 233 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/max_terminated_length": 429.0, "completions/mean_length": 238.9921875, "completions/mean_terminated_length": 238.9921875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.3744, "grad_norm": 0.02792465314269066, "learning_rate": 6.957295373665481e-07, "loss": -0.008, "num_tokens": 114529591.0, "reward": 1.4602546691894531, "reward_std": 0.13010551035404205, "rewards/accuracy_reward_long_step": 0.57421875, "rewards/final_brier_reward_long_step": 0.7742418050765991, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7699018716812134, "step": 234 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/max_terminated_length": 378.0, "completions/mean_length": 234.44140625, "completions/mean_terminated_length": 234.44140625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.376, "grad_norm": 0.029247693717479706, "learning_rate": 6.93950177935943e-07, "loss": 0.0089, "num_tokens": 115003176.0, "reward": 1.4750906229019165, "reward_std": 0.14190274477005005, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.8098050951957703, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8405575752258301, "step": 235 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/max_terminated_length": 391.0, "completions/mean_length": 234.3203125, "completions/mean_terminated_length": 234.3203125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.3776, "grad_norm": 0.04604804888367653, "learning_rate": 6.921708185053381e-07, "loss": -0.0091, "num_tokens": 115499370.0, "reward": 1.253650426864624, "reward_std": 0.1829456090927124, "rewards/accuracy_reward_long_step": 0.36328125, "rewards/final_brier_reward_long_step": 0.8057793378829956, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7556971311569214, "step": 236 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/max_terminated_length": 449.0, "completions/mean_length": 244.71875, "completions/mean_terminated_length": 244.71875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.3792, "grad_norm": 0.031333666294813156, "learning_rate": 6.903914590747331e-07, "loss": 0.0144, "num_tokens": 115977570.0, "reward": 1.4031198024749756, "reward_std": 0.1869141310453415, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.771310567855835, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8255437612533569, "step": 237 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/max_terminated_length": 420.0, "completions/mean_length": 249.234375, "completions/mean_terminated_length": 249.234375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.3808, "grad_norm": 0.028639167547225952, "learning_rate": 6.88612099644128e-07, "loss": 0.0002, "num_tokens": 116465086.0, "reward": 1.3048759698867798, "reward_std": 0.18162578344345093, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7125644683837891, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7725646495819092, "step": 238 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/max_terminated_length": 452.0, "completions/mean_length": 246.1953125, "completions/mean_terminated_length": 246.1953125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.3824, "grad_norm": 0.029338406398892403, "learning_rate": 6.868327402135231e-07, "loss": 0.0053, "num_tokens": 116931320.0, "reward": 1.4342849254608154, "reward_std": 0.1952168196439743, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.8456206917762756, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.79776930809021, "step": 239 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 244.49609375, "completions/mean_terminated_length": 244.49609375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.384, "grad_norm": 0.02827758900821209, "learning_rate": 6.85053380782918e-07, "loss": -0.0059, "num_tokens": 117435631.0, "reward": 1.448837399482727, "reward_std": 0.13833093643188477, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.8106515407562256, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7815728187561035, "step": 240 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/max_terminated_length": 408.0, "completions/mean_length": 245.53515625, "completions/mean_terminated_length": 245.53515625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.3856, "grad_norm": 0.04907617345452309, "learning_rate": 6.832740213523132e-07, "loss": 0.0111, "num_tokens": 117927904.0, "reward": 1.2637048959732056, "reward_std": 0.19578373432159424, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.7642871141433716, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7905321717262268, "step": 241 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/max_terminated_length": 395.0, "completions/mean_length": 247.26953125, "completions/mean_terminated_length": 247.26953125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.3872, "grad_norm": 0.02862401306629181, "learning_rate": 6.814946619217081e-07, "loss": 0.0034, "num_tokens": 118409677.0, "reward": 1.4095513820648193, "reward_std": 0.16092461347579956, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.7932863235473633, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8136688470840454, "step": 242 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/max_terminated_length": 401.0, "completions/mean_length": 245.3828125, "completions/mean_terminated_length": 245.3828125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.3888, "grad_norm": 0.02867737039923668, "learning_rate": 6.797153024911032e-07, "loss": -0.0028, "num_tokens": 118886583.0, "reward": 1.4647985696792603, "reward_std": 0.22731342911720276, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.7210452556610107, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7787743210792542, "step": 243 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 423.0, "completions/max_terminated_length": 423.0, "completions/mean_length": 241.7578125, "completions/mean_terminated_length": 241.7578125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.3904, "grad_norm": 0.029156368225812912, "learning_rate": 6.779359430604982e-07, "loss": -0.005, "num_tokens": 119362457.0, "reward": 1.4701181650161743, "reward_std": 0.18407407402992249, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.7508969306945801, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7702009081840515, "step": 244 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/max_terminated_length": 452.0, "completions/mean_length": 255.76171875, "completions/mean_terminated_length": 255.76171875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.392, "grad_norm": 0.02922751009464264, "learning_rate": 6.761565836298932e-07, "loss": -0.0079, "num_tokens": 119859300.0, "reward": 1.2958691120147705, "reward_std": 0.19874346256256104, "rewards/accuracy_reward_long_step": 0.390625, "rewards/final_brier_reward_long_step": 0.7741097807884216, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.84686678647995, "step": 245 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/max_terminated_length": 452.0, "completions/mean_length": 253.25390625, "completions/mean_terminated_length": 253.25390625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.3936, "grad_norm": 0.029055269435048103, "learning_rate": 6.743772241992882e-07, "loss": 0.0013, "num_tokens": 120346845.0, "reward": 1.3145337104797363, "reward_std": 0.1577182412147522, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.7263835668563843, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.828626275062561, "step": 246 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/max_terminated_length": 455.0, "completions/mean_length": 247.15625, "completions/mean_terminated_length": 247.15625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.3952, "grad_norm": 0.030021749436855316, "learning_rate": 6.725978647686833e-07, "loss": -0.0072, "num_tokens": 120849013.0, "reward": 1.3015249967575073, "reward_std": 0.15772968530654907, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.793542206287384, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7797452211380005, "step": 247 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 246.453125, "completions/mean_terminated_length": 246.453125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.3968, "grad_norm": 0.03100651502609253, "learning_rate": 6.708185053380783e-07, "loss": -0.0017, "num_tokens": 121339545.0, "reward": 1.3638386726379395, "reward_std": 0.18921023607254028, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.7788769602775574, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8171026110649109, "step": 248 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 252.19140625, "completions/mean_terminated_length": 252.19140625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.3984, "grad_norm": 0.029122378677129745, "learning_rate": 6.690391459074733e-07, "loss": 0.0026, "num_tokens": 121826858.0, "reward": 1.3321928977966309, "reward_std": 0.18050938844680786, "rewards/accuracy_reward_long_step": 0.4296875, "rewards/final_brier_reward_long_step": 0.7863625288009644, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.82365882396698, "step": 249 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/max_terminated_length": 401.0, "completions/mean_length": 255.10546875, "completions/mean_terminated_length": 255.10546875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.4, "grad_norm": 0.02999301441013813, "learning_rate": 6.672597864768683e-07, "loss": -0.0024, "num_tokens": 122323669.0, "reward": 1.5447840690612793, "reward_std": 0.1723225712776184, "rewards/accuracy_reward_long_step": 0.66015625, "rewards/final_brier_reward_long_step": 0.7203683853149414, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8181428909301758, "step": 250 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/max_terminated_length": 440.0, "completions/mean_length": 263.68359375, "completions/mean_terminated_length": 263.68359375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.4016, "grad_norm": 0.03101922571659088, "learning_rate": 6.654804270462633e-07, "loss": 0.0001, "num_tokens": 122815420.0, "reward": 1.2982118129730225, "reward_std": 0.22702577710151672, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7307562828063965, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8058412075042725, "step": 251 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/max_terminated_length": 432.0, "completions/mean_length": 269.63671875, "completions/mean_terminated_length": 269.63671875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.4032, "grad_norm": 0.028656797483563423, "learning_rate": 6.637010676156583e-07, "loss": -0.0024, "num_tokens": 123311063.0, "reward": 1.2810194492340088, "reward_std": 0.16011501848697662, "rewards/accuracy_reward_long_step": 0.37109375, "rewards/final_brier_reward_long_step": 0.7987828254699707, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8409198522567749, "step": 252 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/max_terminated_length": 425.0, "completions/mean_length": 253.25, "completions/mean_terminated_length": 253.25, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.4048, "grad_norm": 0.030914753675460815, "learning_rate": 6.619217081850533e-07, "loss": 0.0152, "num_tokens": 123774055.0, "reward": 1.3488011360168457, "reward_std": 0.12081344425678253, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.810867190361023, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8187124729156494, "step": 253 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/max_terminated_length": 429.0, "completions/mean_length": 264.98046875, "completions/mean_terminated_length": 264.98046875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.4064, "grad_norm": 0.028002172708511353, "learning_rate": 6.601423487544484e-07, "loss": -0.0013, "num_tokens": 124260066.0, "reward": 1.4030770063400269, "reward_std": 0.14950624108314514, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7564605474472046, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7308475971221924, "step": 254 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/max_terminated_length": 550.0, "completions/mean_length": 267.8203125, "completions/mean_terminated_length": 267.8203125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.408, "grad_norm": 0.027425022795796394, "learning_rate": 6.583629893238434e-07, "loss": -0.0113, "num_tokens": 124759276.0, "reward": 1.3036949634552002, "reward_std": 0.22329337894916534, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7808917760848999, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7776377201080322, "step": 255 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/max_terminated_length": 430.0, "completions/mean_length": 256.765625, "completions/mean_terminated_length": 256.765625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.4096, "grad_norm": 0.02852526493370533, "learning_rate": 6.565836298932385e-07, "loss": -0.0086, "num_tokens": 125243288.0, "reward": 1.462049961090088, "reward_std": 0.17608040571212769, "rewards/accuracy_reward_long_step": 0.6328125, "rewards/final_brier_reward_long_step": 0.6412858963012695, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6756638288497925, "step": 256 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/max_terminated_length": 489.0, "completions/mean_length": 267.80078125, "completions/mean_terminated_length": 267.80078125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.4112, "grad_norm": 0.030284898355603218, "learning_rate": 6.548042704626334e-07, "loss": -0.0072, "num_tokens": 125741869.0, "reward": 1.3154691457748413, "reward_std": 0.2600463032722473, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.7620574235916138, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7341942191123962, "step": 257 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/max_terminated_length": 418.0, "completions/mean_length": 262.48828125, "completions/mean_terminated_length": 262.48828125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.4128, "grad_norm": 0.03752259910106659, "learning_rate": 6.530249110320284e-07, "loss": -0.0087, "num_tokens": 126234514.0, "reward": 1.4143836498260498, "reward_std": 0.14952951669692993, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.7889230251312256, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8217366933822632, "step": 258 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/max_terminated_length": 437.0, "completions/mean_length": 274.88671875, "completions/mean_terminated_length": 274.88671875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.4144, "grad_norm": 0.028582880273461342, "learning_rate": 6.512455516014234e-07, "loss": -0.0036, "num_tokens": 126730069.0, "reward": 1.4206815958023071, "reward_std": 0.19087818264961243, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.7297155857086182, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8045732975006104, "step": 259 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/max_terminated_length": 478.0, "completions/mean_length": 272.7578125, "completions/mean_terminated_length": 272.7578125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.416, "grad_norm": 0.026567399501800537, "learning_rate": 6.494661921708184e-07, "loss": -0.0099, "num_tokens": 127214927.0, "reward": 1.3398463726043701, "reward_std": 0.12067516893148422, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.860762894153595, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7017475366592407, "step": 260 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 553.0, "completions/max_terminated_length": 553.0, "completions/mean_length": 277.7265625, "completions/mean_terminated_length": 277.7265625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.4176, "grad_norm": 0.02695128507912159, "learning_rate": 6.476868327402136e-07, "loss": -0.001, "num_tokens": 127710617.0, "reward": 1.4976187944412231, "reward_std": 0.14346104860305786, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.7943031191825867, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7742971181869507, "step": 261 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 273.38671875, "completions/mean_terminated_length": 273.38671875, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.4192, "grad_norm": 0.02583600953221321, "learning_rate": 6.459074733096085e-07, "loss": -0.0104, "num_tokens": 128212556.0, "reward": 1.436043620109558, "reward_std": 0.11342111974954605, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.8090195655822754, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.778904914855957, "step": 262 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/max_terminated_length": 486.0, "completions/mean_length": 275.109375, "completions/mean_terminated_length": 275.109375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.4208, "grad_norm": 0.027538571506738663, "learning_rate": 6.441281138790036e-07, "loss": -0.0113, "num_tokens": 128707720.0, "reward": 1.2951858043670654, "reward_std": 0.15389752388000488, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7856941223144531, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.738798975944519, "step": 263 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 587.0, "completions/max_terminated_length": 587.0, "completions/mean_length": 292.70703125, "completions/mean_terminated_length": 292.70703125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.4224, "grad_norm": 0.0262598879635334, "learning_rate": 6.423487544483985e-07, "loss": 0.017, "num_tokens": 129207389.0, "reward": 1.3416748046875, "reward_std": 0.23134978115558624, "rewards/accuracy_reward_long_step": 0.4375, "rewards/final_brier_reward_long_step": 0.7901312112808228, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8343803882598877, "step": 264 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 561.0, "completions/max_terminated_length": 561.0, "completions/mean_length": 288.3515625, "completions/mean_terminated_length": 288.3515625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.424, "grad_norm": 0.030151214450597763, "learning_rate": 6.405693950177936e-07, "loss": 0.0096, "num_tokens": 129708127.0, "reward": 1.5116443634033203, "reward_std": 0.15138523280620575, "rewards/accuracy_reward_long_step": 0.61328125, "rewards/final_brier_reward_long_step": 0.8109831809997559, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7824693918228149, "step": 265 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/max_terminated_length": 530.0, "completions/mean_length": 285.359375, "completions/mean_terminated_length": 285.359375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.4256, "grad_norm": 0.030536562204360962, "learning_rate": 6.387900355871885e-07, "loss": 0.0139, "num_tokens": 130207523.0, "reward": 1.5717109441757202, "reward_std": 0.1531601846218109, "rewards/accuracy_reward_long_step": 0.671875, "rewards/final_brier_reward_long_step": 0.7941410541534424, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.805202841758728, "step": 266 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/max_terminated_length": 512.0, "completions/mean_length": 285.46875, "completions/mean_terminated_length": 285.46875, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.4272, "grad_norm": 0.028177211061120033, "learning_rate": 6.370106761565835e-07, "loss": 0.0141, "num_tokens": 130672219.0, "reward": 1.3481889963150024, "reward_std": 0.1336720734834671, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.836502730846405, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7750029563903809, "step": 267 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/max_terminated_length": 454.0, "completions/mean_length": 289.078125, "completions/mean_terminated_length": 289.078125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.4288, "grad_norm": 0.027222031727433205, "learning_rate": 6.352313167259787e-07, "loss": -0.0063, "num_tokens": 131182759.0, "reward": 1.3185360431671143, "reward_std": 0.17626741528511047, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.7190519571304321, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7894670963287354, "step": 268 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 533.0, "completions/max_terminated_length": 533.0, "completions/mean_length": 280.86328125, "completions/mean_terminated_length": 280.86328125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.4304, "grad_norm": 0.03042900562286377, "learning_rate": 6.334519572953736e-07, "loss": -0.0005, "num_tokens": 131680284.0, "reward": 1.4278100728988647, "reward_std": 0.10464347898960114, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.7685461044311523, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7708194255828857, "step": 269 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 501.0, "completions/max_terminated_length": 501.0, "completions/mean_length": 287.1796875, "completions/mean_terminated_length": 288.305908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.432, "grad_norm": 0.027735978364944458, "learning_rate": 6.316725978647687e-07, "loss": 0.0066, "num_tokens": 132185850.0, "reward": 1.4755172729492188, "reward_std": 0.208018958568573, "rewards/accuracy_reward_long_step": 0.59375, "rewards/final_brier_reward_long_step": 0.7717519402503967, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7631298303604126, "step": 270 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 598.0, "completions/max_terminated_length": 598.0, "completions/mean_length": 296.0703125, "completions/mean_terminated_length": 296.0703125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.4336, "grad_norm": 0.0286890659481287, "learning_rate": 6.298932384341636e-07, "loss": -0.0039, "num_tokens": 132682476.0, "reward": 1.224784255027771, "reward_std": 0.1470840871334076, "rewards/accuracy_reward_long_step": 0.36328125, "rewards/final_brier_reward_long_step": 0.7217913866043091, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7242205142974854, "step": 271 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 290.5546875, "completions/mean_terminated_length": 290.5546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.4352, "grad_norm": 0.028832513839006424, "learning_rate": 6.281138790035587e-07, "loss": 0.0023, "num_tokens": 133175010.0, "reward": 1.4400919675827026, "reward_std": 0.13745911419391632, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.8417414426803589, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.746751606464386, "step": 272 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 567.0, "completions/max_terminated_length": 567.0, "completions/mean_length": 294.5546875, "completions/mean_terminated_length": 294.5546875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.4368, "grad_norm": 0.02651878260076046, "learning_rate": 6.263345195729537e-07, "loss": 0.005, "num_tokens": 133671872.0, "reward": 1.4299688339233398, "reward_std": 0.17098167538642883, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.7499749660491943, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7980256676673889, "step": 273 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 543.0, "completions/max_terminated_length": 543.0, "completions/mean_length": 294.70703125, "completions/mean_terminated_length": 294.70703125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.4384, "grad_norm": 0.026192937046289444, "learning_rate": 6.245551601423488e-07, "loss": 0.0083, "num_tokens": 134164181.0, "reward": 1.3008532524108887, "reward_std": 0.15340715646743774, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.8331218957901001, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7765412330627441, "step": 274 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 677.0, "completions/max_terminated_length": 677.0, "completions/mean_length": 295.03515625, "completions/mean_terminated_length": 295.03515625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.44, "grad_norm": 0.027292657643556595, "learning_rate": 6.227758007117438e-07, "loss": 0.0172, "num_tokens": 134653262.0, "reward": 1.4425451755523682, "reward_std": 0.19112396240234375, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.7454347610473633, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7981832027435303, "step": 275 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/max_terminated_length": 475.0, "completions/mean_length": 280.5625, "completions/mean_terminated_length": 280.5625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.4416, "grad_norm": 0.03567759320139885, "learning_rate": 6.209964412811388e-07, "loss": -0.001, "num_tokens": 135144110.0, "reward": 1.4016071557998657, "reward_std": 0.180876225233078, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7937929630279541, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6876357793807983, "step": 276 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 615.0, "completions/max_terminated_length": 615.0, "completions/mean_length": 296.69921875, "completions/mean_terminated_length": 296.69921875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.4432, "grad_norm": 0.027701787650585175, "learning_rate": 6.192170818505338e-07, "loss": 0.0152, "num_tokens": 135657897.0, "reward": 1.2873167991638184, "reward_std": 0.15553465485572815, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.7762769460678101, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7479904890060425, "step": 277 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 567.0, "completions/max_terminated_length": 567.0, "completions/mean_length": 288.46484375, "completions/mean_terminated_length": 288.46484375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.4448, "grad_norm": 0.027518663555383682, "learning_rate": 6.174377224199287e-07, "loss": 0.0105, "num_tokens": 136161664.0, "reward": 1.4033050537109375, "reward_std": 0.16255879402160645, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.807449996471405, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7745203971862793, "step": 278 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 601.0, "completions/max_terminated_length": 601.0, "completions/mean_length": 289.26171875, "completions/mean_terminated_length": 289.26171875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.4464, "grad_norm": 0.030349889770150185, "learning_rate": 6.156583629893238e-07, "loss": 0.0083, "num_tokens": 136661251.0, "reward": 1.3778247833251953, "reward_std": 0.22678440809249878, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.7540500164031982, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7259989976882935, "step": 279 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 292.5859375, "completions/mean_terminated_length": 292.5859375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.448, "grad_norm": 0.026998843997716904, "learning_rate": 6.138790035587188e-07, "loss": 0.0038, "num_tokens": 137162705.0, "reward": 1.552132248878479, "reward_std": 0.09305281937122345, "rewards/accuracy_reward_long_step": 0.640625, "rewards/final_brier_reward_long_step": 0.8342460989952087, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.811782956123352, "step": 280 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/max_terminated_length": 539.0, "completions/mean_length": 301.2265625, "completions/mean_terminated_length": 301.2265625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.4496, "grad_norm": 0.02617485634982586, "learning_rate": 6.120996441281139e-07, "loss": -0.0058, "num_tokens": 137672107.0, "reward": 1.3473117351531982, "reward_std": 0.1873582899570465, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.6998116970062256, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.783184826374054, "step": 281 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 560.0, "completions/max_terminated_length": 560.0, "completions/mean_length": 291.34375, "completions/mean_terminated_length": 291.34375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.4512, "grad_norm": 0.02765418216586113, "learning_rate": 6.103202846975089e-07, "loss": 0.0093, "num_tokens": 138184259.0, "reward": 1.489346981048584, "reward_std": 0.16857793927192688, "rewards/accuracy_reward_long_step": 0.59375, "rewards/final_brier_reward_long_step": 0.8282409906387329, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7541468143463135, "step": 282 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 581.0, "completions/max_terminated_length": 581.0, "completions/mean_length": 286.015625, "completions/mean_terminated_length": 286.015625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.4528, "grad_norm": 0.029208846390247345, "learning_rate": 6.085409252669039e-07, "loss": -0.0042, "num_tokens": 138692327.0, "reward": 1.3559755086898804, "reward_std": 0.20295041799545288, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7214418053627014, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7024602293968201, "step": 283 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 609.0, "completions/max_terminated_length": 609.0, "completions/mean_length": 301.9296875, "completions/mean_terminated_length": 301.9296875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.4544, "grad_norm": 0.030748292803764343, "learning_rate": 6.067615658362989e-07, "loss": -0.0041, "num_tokens": 139194445.0, "reward": 1.1845823526382446, "reward_std": 0.12908682227134705, "rewards/accuracy_reward_long_step": 0.30859375, "rewards/final_brier_reward_long_step": 0.7271432876586914, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7768109440803528, "step": 284 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 529.0, "completions/max_terminated_length": 529.0, "completions/mean_length": 282.84765625, "completions/mean_terminated_length": 282.84765625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.456, "grad_norm": 0.0292031429708004, "learning_rate": 6.04982206405694e-07, "loss": 0.0071, "num_tokens": 139705542.0, "reward": 1.32478928565979, "reward_std": 0.21371236443519592, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7342382073402405, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8305443525314331, "step": 285 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 290.70703125, "completions/mean_terminated_length": 290.70703125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.4576, "grad_norm": 0.027747957035899162, "learning_rate": 6.032028469750889e-07, "loss": -0.0002, "num_tokens": 140203851.0, "reward": 1.3524994850158691, "reward_std": 0.12304867804050446, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7922519445419312, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8208708167076111, "step": 286 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 534.0, "completions/max_terminated_length": 534.0, "completions/mean_length": 293.1640625, "completions/mean_terminated_length": 293.1640625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.4592, "grad_norm": 0.027378322556614876, "learning_rate": 6.014234875444839e-07, "loss": 0.013, "num_tokens": 140702597.0, "reward": 1.173850178718567, "reward_std": 0.15952864289283752, "rewards/accuracy_reward_long_step": 0.31640625, "rewards/final_brier_reward_long_step": 0.7225792407989502, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7071964144706726, "step": 287 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 671.0, "completions/max_terminated_length": 671.0, "completions/mean_length": 285.3203125, "completions/mean_terminated_length": 285.3203125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.4608, "grad_norm": 0.02744467370212078, "learning_rate": 5.99644128113879e-07, "loss": 0.0087, "num_tokens": 141206695.0, "reward": 1.2383294105529785, "reward_std": 0.14181900024414062, "rewards/accuracy_reward_long_step": 0.359375, "rewards/final_brier_reward_long_step": 0.7510156631469727, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7648018598556519, "step": 288 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/max_terminated_length": 440.0, "completions/mean_length": 272.34375, "completions/mean_terminated_length": 272.34375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.4624, "grad_norm": 0.02806474268436432, "learning_rate": 5.97864768683274e-07, "loss": 0.0099, "num_tokens": 141695007.0, "reward": 1.2011268138885498, "reward_std": 0.14036910235881805, "rewards/accuracy_reward_long_step": 0.3359375, "rewards/final_brier_reward_long_step": 0.7097039222717285, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7510532736778259, "step": 289 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/max_terminated_length": 470.0, "completions/mean_length": 266.53125, "completions/mean_terminated_length": 266.53125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.464, "grad_norm": 0.028714032843708992, "learning_rate": 5.96085409252669e-07, "loss": 0.0078, "num_tokens": 142182015.0, "reward": 1.4051401615142822, "reward_std": 0.19630657136440277, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.7274429798126221, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8306175470352173, "step": 290 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/max_terminated_length": 440.0, "completions/mean_length": 264.8203125, "completions/mean_terminated_length": 264.8203125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.4656, "grad_norm": 0.02966773696243763, "learning_rate": 5.94306049822064e-07, "loss": -0.0106, "num_tokens": 142663985.0, "reward": 1.3878998756408691, "reward_std": 0.13589531183242798, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.7204523086547852, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7764595746994019, "step": 291 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/max_terminated_length": 501.0, "completions/mean_length": 279.4765625, "completions/mean_terminated_length": 279.4765625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.4672, "grad_norm": 0.02832869067788124, "learning_rate": 5.925266903914591e-07, "loss": 0.005, "num_tokens": 143170555.0, "reward": 1.2539631128311157, "reward_std": 0.18463820219039917, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.6404902338981628, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7816122174263, "step": 292 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 558.0, "completions/max_terminated_length": 558.0, "completions/mean_length": 261.421875, "completions/mean_terminated_length": 261.421875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.4688, "grad_norm": 0.03127996623516083, "learning_rate": 5.90747330960854e-07, "loss": 0.0174, "num_tokens": 143668759.0, "reward": 1.3849046230316162, "reward_std": 0.14204376935958862, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.8194859027862549, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.767007052898407, "step": 293 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/max_terminated_length": 525.0, "completions/mean_length": 280.4375, "completions/mean_terminated_length": 280.4375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.4704, "grad_norm": 0.030284756794571877, "learning_rate": 5.889679715302491e-07, "loss": -0.0029, "num_tokens": 144175575.0, "reward": 1.3805763721466064, "reward_std": 0.18626053631305695, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.7643499970436096, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8048302531242371, "step": 294 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/max_terminated_length": 481.0, "completions/mean_length": 267.71484375, "completions/mean_terminated_length": 267.71484375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.472, "grad_norm": 0.02768511138856411, "learning_rate": 5.871886120996441e-07, "loss": -0.0029, "num_tokens": 144659334.0, "reward": 1.3032722473144531, "reward_std": 0.13138622045516968, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7461843490600586, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8106545805931091, "step": 295 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 436.0, "completions/max_terminated_length": 436.0, "completions/mean_length": 271.9453125, "completions/mean_terminated_length": 273.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.4736, "grad_norm": 0.03377687931060791, "learning_rate": 5.854092526690391e-07, "loss": 0.0064, "num_tokens": 145164320.0, "reward": 1.291682243347168, "reward_std": 0.21950051188468933, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.7516234517097473, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8291677236557007, "step": 296 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/max_terminated_length": 491.0, "completions/mean_length": 261.671875, "completions/mean_terminated_length": 261.671875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.4752, "grad_norm": 0.0278344564139843, "learning_rate": 5.836298932384342e-07, "loss": -0.0083, "num_tokens": 145661492.0, "reward": 1.4372165203094482, "reward_std": 0.19561487436294556, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.7994953393936157, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8478083610534668, "step": 297 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 260.375, "completions/mean_terminated_length": 260.375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.4768, "grad_norm": 0.030550826340913773, "learning_rate": 5.818505338078291e-07, "loss": 0.0046, "num_tokens": 146145276.0, "reward": 1.3957126140594482, "reward_std": 0.1581364870071411, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7615882754325867, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6962625980377197, "step": 298 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 262.453125, "completions/mean_terminated_length": 262.453125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.4784, "grad_norm": 0.02928241901099682, "learning_rate": 5.800711743772242e-07, "loss": -0.0049, "num_tokens": 146631200.0, "reward": 1.560309648513794, "reward_std": 0.15518754720687866, "rewards/accuracy_reward_long_step": 0.68359375, "rewards/final_brier_reward_long_step": 0.7499921321868896, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.756871223449707, "step": 299 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 270.3203125, "completions/mean_terminated_length": 270.3203125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.48, "grad_norm": 0.032593853771686554, "learning_rate": 5.782918149466191e-07, "loss": 0.0044, "num_tokens": 147121786.0, "reward": 1.4181792736053467, "reward_std": 0.17730304598808289, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7884241342544556, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8061679601669312, "step": 300 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 275.15625, "completions/mean_terminated_length": 275.15625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.4816, "grad_norm": 0.028711630031466484, "learning_rate": 5.765124555160142e-07, "loss": 0.0037, "num_tokens": 147618706.0, "reward": 1.3091957569122314, "reward_std": 0.17498339712619781, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.6745136976242065, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.859144389629364, "step": 301 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/max_terminated_length": 469.0, "completions/mean_length": 272.421875, "completions/mean_terminated_length": 272.421875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.4832, "grad_norm": 0.02766992151737213, "learning_rate": 5.747330960854092e-07, "loss": -0.0112, "num_tokens": 148128422.0, "reward": 1.4776198863983154, "reward_std": 0.13255634903907776, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.7580232620239258, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7930811643600464, "step": 302 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/max_terminated_length": 464.0, "completions/mean_length": 270.1875, "completions/mean_terminated_length": 270.1875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.4848, "grad_norm": 0.03053920716047287, "learning_rate": 5.729537366548043e-07, "loss": 0.0108, "num_tokens": 148619142.0, "reward": 1.3393454551696777, "reward_std": 0.1206967830657959, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7705498933792114, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7899569272994995, "step": 303 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 578.0, "completions/max_terminated_length": 578.0, "completions/mean_length": 275.53125, "completions/mean_terminated_length": 275.53125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.4864, "grad_norm": 0.0303883645683527, "learning_rate": 5.711743772241993e-07, "loss": 0.0186, "num_tokens": 149120358.0, "reward": 1.4402146339416504, "reward_std": 0.17118000984191895, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.8297659158706665, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8373425006866455, "step": 304 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 565.0, "completions/max_terminated_length": 565.0, "completions/mean_length": 282.84765625, "completions/mean_terminated_length": 282.84765625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.488, "grad_norm": 0.02846652828156948, "learning_rate": 5.693950177935943e-07, "loss": 0.0182, "num_tokens": 149610279.0, "reward": 1.5291298627853394, "reward_std": 0.18615154922008514, "rewards/accuracy_reward_long_step": 0.62890625, "rewards/final_brier_reward_long_step": 0.8304492235183716, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7860701084136963, "step": 305 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/max_terminated_length": 509.0, "completions/mean_length": 264.23046875, "completions/mean_terminated_length": 264.23046875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.4896, "grad_norm": 0.03294990211725235, "learning_rate": 5.676156583629893e-07, "loss": 0.0049, "num_tokens": 150121194.0, "reward": 1.437424659729004, "reward_std": 0.1931784749031067, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.7560929656028748, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7904808521270752, "step": 306 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/max_terminated_length": 466.0, "completions/mean_length": 270.6640625, "completions/mean_terminated_length": 270.6640625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.4912, "grad_norm": 0.028145214542746544, "learning_rate": 5.658362989323842e-07, "loss": 0.0125, "num_tokens": 150638356.0, "reward": 1.3298184871673584, "reward_std": 0.15343712270259857, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.7142324447631836, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.77691650390625, "step": 307 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 265.69140625, "completions/mean_terminated_length": 265.69140625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.4928, "grad_norm": 0.03243206813931465, "learning_rate": 5.640569395017794e-07, "loss": -0.0123, "num_tokens": 151144573.0, "reward": 1.4519245624542236, "reward_std": 0.1609051525592804, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.7463042736053467, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7176437377929688, "step": 308 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/max_terminated_length": 525.0, "completions/mean_length": 253.2734375, "completions/mean_terminated_length": 253.2734375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.4944, "grad_norm": 0.028735455125570297, "learning_rate": 5.622775800711744e-07, "loss": 0.0077, "num_tokens": 151640987.0, "reward": 1.323132038116455, "reward_std": 0.12191449105739594, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.7362834215164185, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7749943733215332, "step": 309 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/max_terminated_length": 517.0, "completions/mean_length": 267.671875, "completions/mean_terminated_length": 267.671875, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.496, "grad_norm": 0.03202186897397041, "learning_rate": 5.604982206405694e-07, "loss": 0.0031, "num_tokens": 152130871.0, "reward": 1.4370348453521729, "reward_std": 0.1757480651140213, "rewards/accuracy_reward_long_step": 0.57421875, "rewards/final_brier_reward_long_step": 0.7055065631866455, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.745758056640625, "step": 310 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.0, "completions/max_terminated_length": 476.0, "completions/mean_length": 268.640625, "completions/mean_terminated_length": 268.640625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.4976, "grad_norm": 0.029871582984924316, "learning_rate": 5.587188612099644e-07, "loss": 0.0021, "num_tokens": 152631003.0, "reward": 1.5090341567993164, "reward_std": 0.16095715761184692, "rewards/accuracy_reward_long_step": 0.625, "rewards/final_brier_reward_long_step": 0.764398455619812, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7717381715774536, "step": 311 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.0, "completions/max_terminated_length": 524.0, "completions/mean_length": 259.47265625, "completions/mean_terminated_length": 259.47265625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.4992, "grad_norm": 0.03202946111559868, "learning_rate": 5.569395017793594e-07, "loss": -0.006, "num_tokens": 153126428.0, "reward": 1.4881701469421387, "reward_std": 0.14949887990951538, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.7662238478660583, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.842707097530365, "step": 312 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 515.0, "completions/max_terminated_length": 515.0, "completions/mean_length": 249.61328125, "completions/mean_terminated_length": 249.61328125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.5008, "grad_norm": 0.03125175088644028, "learning_rate": 5.551601423487544e-07, "loss": 0.0167, "num_tokens": 153607737.0, "reward": 1.461435079574585, "reward_std": 0.16793528199195862, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.8274839520454407, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7682562470436096, "step": 313 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/max_terminated_length": 547.0, "completions/mean_length": 260.56640625, "completions/mean_terminated_length": 260.56640625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.5024, "grad_norm": 0.03080155700445175, "learning_rate": 5.533807829181495e-07, "loss": -0.0045, "num_tokens": 154100658.0, "reward": 1.3649942874908447, "reward_std": 0.20213352143764496, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.7633898258209229, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7434619665145874, "step": 314 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/max_terminated_length": 465.0, "completions/mean_length": 260.87109375, "completions/mean_terminated_length": 260.87109375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.504, "grad_norm": 0.030286213383078575, "learning_rate": 5.516014234875445e-07, "loss": 0.0064, "num_tokens": 154586593.0, "reward": 1.4095053672790527, "reward_std": 0.19581949710845947, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.7196574211120605, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7621144652366638, "step": 315 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 575.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 282.4765625, "completions/mean_terminated_length": 282.4765625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.5056, "grad_norm": 0.03086298704147339, "learning_rate": 5.498220640569395e-07, "loss": -0.0094, "num_tokens": 155107683.0, "reward": 1.233945608139038, "reward_std": 0.2137664556503296, "rewards/accuracy_reward_long_step": 0.40234375, "rewards/final_brier_reward_long_step": 0.5642339587211609, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7621732950210571, "step": 316 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 646.0, "completions/max_terminated_length": 646.0, "completions/mean_length": 273.9296875, "completions/mean_terminated_length": 273.9296875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.5072, "grad_norm": 0.029751170426607132, "learning_rate": 5.480427046263345e-07, "loss": 0.0058, "num_tokens": 155611881.0, "reward": 1.3837661743164062, "reward_std": 0.1578415334224701, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7951062917709351, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8024587631225586, "step": 317 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 655.0, "completions/max_terminated_length": 655.0, "completions/mean_length": 270.0625, "completions/mean_terminated_length": 270.0625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.5088, "grad_norm": 0.030748968943953514, "learning_rate": 5.462633451957295e-07, "loss": 0.0082, "num_tokens": 156108097.0, "reward": 1.4630606174468994, "reward_std": 0.1447874754667282, "rewards/accuracy_reward_long_step": 0.5546875, "rewards/final_brier_reward_long_step": 0.8417631983757019, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7917290329933167, "step": 318 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 568.0, "completions/max_terminated_length": 568.0, "completions/mean_length": 261.6328125, "completions/mean_terminated_length": 261.6328125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.5104, "grad_norm": 0.03255579620599747, "learning_rate": 5.444839857651245e-07, "loss": 0.0189, "num_tokens": 156600459.0, "reward": 1.453078031539917, "reward_std": 0.21487998962402344, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.7861437797546387, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7917930483818054, "step": 319 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/max_terminated_length": 482.0, "completions/mean_length": 255.890625, "completions/mean_terminated_length": 255.890625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.512, "grad_norm": 0.03229495882987976, "learning_rate": 5.427046263345195e-07, "loss": 0.0071, "num_tokens": 157085455.0, "reward": 1.3672808408737183, "reward_std": 0.13604342937469482, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7324371337890625, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8148113489151001, "step": 320 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 260.43359375, "completions/mean_terminated_length": 260.43359375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.5136, "grad_norm": 0.03101903200149536, "learning_rate": 5.409252669039146e-07, "loss": 0.0156, "num_tokens": 157573070.0, "reward": 1.4484410285949707, "reward_std": 0.19034847617149353, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7853777408599854, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.758386492729187, "step": 321 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 582.0, "completions/max_terminated_length": 582.0, "completions/mean_length": 259.2109375, "completions/mean_terminated_length": 259.2109375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.5152, "grad_norm": 0.032188545912504196, "learning_rate": 5.391459074733096e-07, "loss": 0.0058, "num_tokens": 158069828.0, "reward": 1.2921218872070312, "reward_std": 0.1610349416732788, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.6915820837020874, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.703468382358551, "step": 322 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/max_terminated_length": 503.0, "completions/mean_length": 253.05859375, "completions/mean_terminated_length": 253.05859375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.5168, "grad_norm": 0.030405355617403984, "learning_rate": 5.373665480427047e-07, "loss": 0.0005, "num_tokens": 158552475.0, "reward": 1.4460304975509644, "reward_std": 0.18330608308315277, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.8409663438796997, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7869055271148682, "step": 323 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/max_terminated_length": 450.0, "completions/mean_length": 249.57421875, "completions/mean_terminated_length": 249.57421875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.5184, "grad_norm": 0.033257272094488144, "learning_rate": 5.355871886120996e-07, "loss": -0.0137, "num_tokens": 159043982.0, "reward": 1.4014875888824463, "reward_std": 0.10161018371582031, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.8111592531204224, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8104158043861389, "step": 324 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/max_terminated_length": 517.0, "completions/mean_length": 254.00390625, "completions/mean_terminated_length": 254.00390625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.52, "grad_norm": 0.03563224524259567, "learning_rate": 5.338078291814946e-07, "loss": -0.0095, "num_tokens": 159536455.0, "reward": 1.3375245332717896, "reward_std": 0.16401183605194092, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.6796140670776367, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7954840660095215, "step": 325 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 242.38671875, "completions/mean_terminated_length": 242.38671875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.5216, "grad_norm": 0.034420643001794815, "learning_rate": 5.320284697508896e-07, "loss": 0.0041, "num_tokens": 160011394.0, "reward": 1.3240625858306885, "reward_std": 0.15140679478645325, "rewards/accuracy_reward_long_step": 0.4609375, "rewards/final_brier_reward_long_step": 0.7299777269363403, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7225224375724792, "step": 326 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/max_terminated_length": 497.0, "completions/mean_length": 248.5546875, "completions/mean_terminated_length": 248.5546875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.5232, "grad_norm": 0.03537153825163841, "learning_rate": 5.302491103202846e-07, "loss": -0.012, "num_tokens": 160502680.0, "reward": 1.5624842643737793, "reward_std": 0.18345743417739868, "rewards/accuracy_reward_long_step": 0.69921875, "rewards/final_brier_reward_long_step": 0.7545043230056763, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6985577344894409, "step": 327 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 608.0, "completions/max_terminated_length": 608.0, "completions/mean_length": 255.44921875, "completions/mean_terminated_length": 255.44921875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.5248, "grad_norm": 0.038355033844709396, "learning_rate": 5.284697508896797e-07, "loss": 0.0104, "num_tokens": 160992083.0, "reward": 1.406353235244751, "reward_std": 0.15258005261421204, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7742776274681091, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7730100750923157, "step": 328 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/max_terminated_length": 478.0, "completions/mean_length": 249.23046875, "completions/mean_terminated_length": 249.23046875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.5264, "grad_norm": 0.03090524673461914, "learning_rate": 5.266903914590747e-07, "loss": 0.0061, "num_tokens": 161497718.0, "reward": 1.4268403053283691, "reward_std": 0.11180461198091507, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.8800667524337769, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8429189324378967, "step": 329 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/max_terminated_length": 442.0, "completions/mean_length": 250.3515625, "completions/mean_terminated_length": 250.3515625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.528, "grad_norm": 0.034447081387043, "learning_rate": 5.249110320284698e-07, "loss": 0.0033, "num_tokens": 161980056.0, "reward": 1.3684167861938477, "reward_std": 0.18932107090950012, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7735214829444885, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7626460790634155, "step": 330 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 541.0, "completions/max_terminated_length": 541.0, "completions/mean_length": 249.62109375, "completions/mean_terminated_length": 249.62109375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.5296, "grad_norm": 0.034051742404699326, "learning_rate": 5.231316725978647e-07, "loss": -0.0067, "num_tokens": 162477343.0, "reward": 1.403045654296875, "reward_std": 0.18791253864765167, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.7827702760696411, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7434748411178589, "step": 331 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 618.0, "completions/max_terminated_length": 618.0, "completions/mean_length": 243.046875, "completions/mean_terminated_length": 243.046875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.5312, "grad_norm": 0.03361974656581879, "learning_rate": 5.213523131672598e-07, "loss": 0.0114, "num_tokens": 162956923.0, "reward": 1.544013500213623, "reward_std": 0.20560047030448914, "rewards/accuracy_reward_long_step": 0.66796875, "rewards/final_brier_reward_long_step": 0.7412210702896118, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7629580497741699, "step": 332 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 578.0, "completions/max_terminated_length": 578.0, "completions/mean_length": 256.22265625, "completions/mean_terminated_length": 256.22265625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.5328, "grad_norm": 0.03932815417647362, "learning_rate": 5.195729537366548e-07, "loss": -0.0093, "num_tokens": 163447412.0, "reward": 1.455832839012146, "reward_std": 0.1748843789100647, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.825259804725647, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.810571551322937, "step": 333 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 538.0, "completions/max_terminated_length": 538.0, "completions/mean_length": 245.2265625, "completions/mean_terminated_length": 245.2265625, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.5344, "grad_norm": 0.03335999324917793, "learning_rate": 5.177935943060498e-07, "loss": -0.0032, "num_tokens": 163941862.0, "reward": 1.3817181587219238, "reward_std": 0.17352280020713806, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.7708051204681396, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6935676336288452, "step": 334 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/max_terminated_length": 484.0, "completions/mean_length": 236.14453125, "completions/mean_terminated_length": 236.14453125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.536, "grad_norm": 0.03453889861702919, "learning_rate": 5.160142348754448e-07, "loss": 0.0059, "num_tokens": 164414147.0, "reward": 1.4293192625045776, "reward_std": 0.1838047057390213, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.8181675672531128, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7897346019744873, "step": 335 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/max_terminated_length": 448.0, "completions/mean_length": 247.52734375, "completions/mean_terminated_length": 247.52734375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.5376, "grad_norm": 0.03347809612751007, "learning_rate": 5.142348754448398e-07, "loss": -0.0082, "num_tokens": 164904322.0, "reward": 1.4061800241470337, "reward_std": 0.15439936518669128, "rewards/accuracy_reward_long_step": 0.5546875, "rewards/final_brier_reward_long_step": 0.740646481513977, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6653236746788025, "step": 336 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/max_terminated_length": 547.0, "completions/mean_length": 246.3828125, "completions/mean_terminated_length": 246.3828125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.5392, "grad_norm": 0.03456057235598564, "learning_rate": 5.124555160142349e-07, "loss": 0.002, "num_tokens": 165401044.0, "reward": 1.3541990518569946, "reward_std": 0.1308155059814453, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.6761799454689026, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6781162023544312, "step": 337 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 693.0, "completions/max_terminated_length": 693.0, "completions/mean_length": 259.3046875, "completions/mean_terminated_length": 259.3046875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.5408, "grad_norm": 0.029813647270202637, "learning_rate": 5.106761565836298e-07, "loss": 0.0009, "num_tokens": 165906058.0, "reward": 1.3256361484527588, "reward_std": 0.19530092179775238, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.670098066329956, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7730719447135925, "step": 338 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 246.8046875, "completions/mean_terminated_length": 246.8046875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.5424, "grad_norm": 0.03796224668622017, "learning_rate": 5.088967971530249e-07, "loss": 0.0075, "num_tokens": 166401920.0, "reward": 1.4465839862823486, "reward_std": 0.207666277885437, "rewards/accuracy_reward_long_step": 0.625, "rewards/final_brier_reward_long_step": 0.7033705711364746, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.5829657316207886, "step": 339 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 522.0, "completions/max_terminated_length": 522.0, "completions/mean_length": 260.37109375, "completions/mean_terminated_length": 260.37109375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.544, "grad_norm": 0.03487522527575493, "learning_rate": 5.071174377224199e-07, "loss": -0.0028, "num_tokens": 166900079.0, "reward": 1.364084243774414, "reward_std": 0.12331333756446838, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.7763662934303284, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6643457412719727, "step": 340 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 882.0, "completions/max_terminated_length": 882.0, "completions/mean_length": 253.23828125, "completions/mean_terminated_length": 253.23828125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.5456, "grad_norm": 0.03571302443742752, "learning_rate": 5.053380782918149e-07, "loss": 0.0008, "num_tokens": 167388084.0, "reward": 1.4308526515960693, "reward_std": 0.14994728565216064, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.7553993463516235, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7648867964744568, "step": 341 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/max_terminated_length": 547.0, "completions/mean_length": 243.5703125, "completions/mean_terminated_length": 243.5703125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.5472, "grad_norm": 0.034076888114213943, "learning_rate": 5.0355871886121e-07, "loss": -0.0049, "num_tokens": 167867318.0, "reward": 1.4327844381332397, "reward_std": 0.17568854987621307, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7377663850784302, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7433711290359497, "step": 342 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 231.46875, "completions/mean_terminated_length": 231.46875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.5488, "grad_norm": 0.03265485167503357, "learning_rate": 5.01779359430605e-07, "loss": 0.0058, "num_tokens": 168334662.0, "reward": 1.5790760517120361, "reward_std": 0.20140470564365387, "rewards/accuracy_reward_long_step": 0.69921875, "rewards/final_brier_reward_long_step": 0.7507095336914062, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7765324115753174, "step": 343 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 242.12890625, "completions/mean_terminated_length": 242.12890625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.5504, "grad_norm": 0.0313512459397316, "learning_rate": 5e-07, "loss": -0.0061, "num_tokens": 168823871.0, "reward": 1.3449064493179321, "reward_std": 0.16971346735954285, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7380057573318481, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7509950995445251, "step": 344 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/max_terminated_length": 432.0, "completions/mean_length": 240.40625, "completions/mean_terminated_length": 240.40625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.552, "grad_norm": 0.035064004361629486, "learning_rate": 4.98220640569395e-07, "loss": 0.0009, "num_tokens": 169308207.0, "reward": 1.447171688079834, "reward_std": 0.1690702587366104, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.766781210899353, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7406556606292725, "step": 345 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 542.0, "completions/max_terminated_length": 542.0, "completions/mean_length": 255.90234375, "completions/mean_terminated_length": 255.90234375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.5536, "grad_norm": 0.03382926061749458, "learning_rate": 4.9644128113879e-07, "loss": 0.0094, "num_tokens": 169790286.0, "reward": 1.3020596504211426, "reward_std": 0.1404658854007721, "rewards/accuracy_reward_long_step": 0.4375, "rewards/final_brier_reward_long_step": 0.7163107991218567, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.741927981376648, "step": 346 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/max_terminated_length": 472.0, "completions/mean_length": 238.2265625, "completions/mean_terminated_length": 238.2265625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.5552, "grad_norm": 0.04365375638008118, "learning_rate": 4.94661921708185e-07, "loss": 0.0012, "num_tokens": 170253896.0, "reward": 1.3717888593673706, "reward_std": 0.159200519323349, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.7934491634368896, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8187063932418823, "step": 347 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/max_terminated_length": 448.0, "completions/mean_length": 253.41796875, "completions/mean_terminated_length": 253.41796875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.5568, "grad_norm": 0.034737542271614075, "learning_rate": 4.9288256227758e-07, "loss": 0.0096, "num_tokens": 170751355.0, "reward": 1.2173829078674316, "reward_std": 0.1510533094406128, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.6595523357391357, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7099793553352356, "step": 348 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/max_terminated_length": 456.0, "completions/mean_length": 239.58203125, "completions/mean_terminated_length": 239.58203125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.5584, "grad_norm": 0.04001186043024063, "learning_rate": 4.91103202846975e-07, "loss": -0.002, "num_tokens": 171235744.0, "reward": 1.4833966493606567, "reward_std": 0.1600235551595688, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.8168105483055115, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7730263471603394, "step": 349 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 255.8125, "completions/mean_terminated_length": 255.8125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.56, "grad_norm": 0.03591832518577576, "learning_rate": 4.893238434163701e-07, "loss": -0.0068, "num_tokens": 171725112.0, "reward": 1.219707727432251, "reward_std": 0.14950095117092133, "rewards/accuracy_reward_long_step": 0.37109375, "rewards/final_brier_reward_long_step": 0.6841285228729248, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7103271484375, "step": 350 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 608.0, "completions/max_terminated_length": 608.0, "completions/mean_length": 253.22265625, "completions/mean_terminated_length": 253.22265625, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.5616, "grad_norm": 0.033379342406988144, "learning_rate": 4.875444839857651e-07, "loss": 0.0029, "num_tokens": 172224321.0, "reward": 1.3003888130187988, "reward_std": 0.14580082893371582, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.7715871334075928, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7893432974815369, "step": 351 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 247.00390625, "completions/mean_terminated_length": 247.00390625, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.5632, "grad_norm": 0.03563377261161804, "learning_rate": 4.857651245551601e-07, "loss": -0.0141, "num_tokens": 172723978.0, "reward": 1.2515490055084229, "reward_std": 0.20609885454177856, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.5958093404769897, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7072615027427673, "step": 352 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 240.86328125, "completions/mean_terminated_length": 240.86328125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.5648, "grad_norm": 0.030260441824793816, "learning_rate": 4.839857651245551e-07, "loss": 0.0111, "num_tokens": 173228511.0, "reward": 1.430063009262085, "reward_std": 0.10446056723594666, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.7712934017181396, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8552085757255554, "step": 353 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 423.0, "completions/max_terminated_length": 423.0, "completions/mean_length": 246.74609375, "completions/mean_terminated_length": 246.74609375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.5664, "grad_norm": 0.03534315153956413, "learning_rate": 4.822064056939501e-07, "loss": 0.007, "num_tokens": 173716982.0, "reward": 1.3442493677139282, "reward_std": 0.20917916297912598, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7205374836921692, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7345851063728333, "step": 354 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 243.53125, "completions/mean_terminated_length": 243.53125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.568, "grad_norm": 0.046947211027145386, "learning_rate": 4.804270462633451e-07, "loss": -0.0176, "num_tokens": 174209438.0, "reward": 1.4447617530822754, "reward_std": 0.1805291771888733, "rewards/accuracy_reward_long_step": 0.57421875, "rewards/final_brier_reward_long_step": 0.7230343818664551, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7591376304626465, "step": 355 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 611.0, "completions/max_terminated_length": 611.0, "completions/mean_length": 244.32421875, "completions/mean_terminated_length": 244.32421875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.5696, "grad_norm": 0.03166520223021507, "learning_rate": 4.786476868327403e-07, "loss": 0.0097, "num_tokens": 174683969.0, "reward": 1.4561214447021484, "reward_std": 0.1352284848690033, "rewards/accuracy_reward_long_step": 0.57421875, "rewards/final_brier_reward_long_step": 0.7665960788726807, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7610146999359131, "step": 356 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 540.0, "completions/max_terminated_length": 540.0, "completions/mean_length": 265.96875, "completions/mean_terminated_length": 265.96875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.5712, "grad_norm": 0.03730427846312523, "learning_rate": 4.768683274021353e-07, "loss": -0.0075, "num_tokens": 175174505.0, "reward": 1.296769142150879, "reward_std": 0.15302547812461853, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7661605477333069, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7646663188934326, "step": 357 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 258.87890625, "completions/mean_terminated_length": 258.87890625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.5728, "grad_norm": 0.038667719811201096, "learning_rate": 4.7508896797153023e-07, "loss": 0.01, "num_tokens": 175667706.0, "reward": 1.4412386417388916, "reward_std": 0.21622003614902496, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.7790859341621399, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8061808347702026, "step": 358 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/max_terminated_length": 449.0, "completions/mean_length": 249.40234375, "completions/mean_terminated_length": 249.40234375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.5744, "grad_norm": 0.03672811761498451, "learning_rate": 4.733096085409252e-07, "loss": 0.0064, "num_tokens": 176151705.0, "reward": 1.4096336364746094, "reward_std": 0.14016187191009521, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.8072555065155029, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7844040393829346, "step": 359 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/max_terminated_length": 491.0, "completions/mean_length": 256.26953125, "completions/mean_terminated_length": 256.26953125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.576, "grad_norm": 0.031908176839351654, "learning_rate": 4.7153024911032026e-07, "loss": 0.0018, "num_tokens": 176654878.0, "reward": 1.2949192523956299, "reward_std": 0.1339997947216034, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.7606054544448853, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7784462571144104, "step": 360 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 586.0, "completions/max_terminated_length": 586.0, "completions/mean_length": 260.42578125, "completions/mean_terminated_length": 260.42578125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.5776, "grad_norm": 0.03353104740381241, "learning_rate": 4.697508896797153e-07, "loss": -0.0028, "num_tokens": 177158483.0, "reward": 1.3936898708343506, "reward_std": 0.17937517166137695, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.763106644153595, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7335278391838074, "step": 361 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 533.0, "completions/max_terminated_length": 533.0, "completions/mean_length": 250.55078125, "completions/mean_terminated_length": 250.55078125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.5792, "grad_norm": 0.0516292005777359, "learning_rate": 4.679715302491103e-07, "loss": 0.002, "num_tokens": 177665080.0, "reward": 1.3700807094573975, "reward_std": 0.1468803435564041, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.6796808242797852, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7068922519683838, "step": 362 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.0, "completions/max_terminated_length": 593.0, "completions/mean_length": 257.5625, "completions/mean_terminated_length": 257.5625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.5808, "grad_norm": 0.036485347896814346, "learning_rate": 4.661921708185053e-07, "loss": -0.0097, "num_tokens": 178161496.0, "reward": 1.1560263633728027, "reward_std": 0.13470560312271118, "rewards/accuracy_reward_long_step": 0.2890625, "rewards/final_brier_reward_long_step": 0.6383723020553589, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.829483687877655, "step": 363 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 399.0, "completions/max_terminated_length": 399.0, "completions/mean_length": 231.03515625, "completions/mean_terminated_length": 231.03515625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.5824, "grad_norm": 0.039585795253515244, "learning_rate": 4.644128113879003e-07, "loss": 0.0097, "num_tokens": 178643185.0, "reward": 1.4548025131225586, "reward_std": 0.1070779412984848, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7518347501754761, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7861253023147583, "step": 364 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 240.80078125, "completions/mean_terminated_length": 240.80078125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.584, "grad_norm": 0.037114016711711884, "learning_rate": 4.626334519572954e-07, "loss": -0.0042, "num_tokens": 179134550.0, "reward": 1.3695037364959717, "reward_std": 0.16490252315998077, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7435758113861084, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8281888365745544, "step": 365 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/max_terminated_length": 478.0, "completions/mean_length": 236.41796875, "completions/mean_terminated_length": 236.41796875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.5856, "grad_norm": 0.03539412468671799, "learning_rate": 4.608540925266904e-07, "loss": 0.0036, "num_tokens": 179618601.0, "reward": 1.3902171850204468, "reward_std": 0.11728814244270325, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.767492949962616, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7152509689331055, "step": 366 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/max_terminated_length": 467.0, "completions/mean_length": 250.51953125, "completions/mean_terminated_length": 250.51953125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.5872, "grad_norm": 0.03185239061713219, "learning_rate": 4.590747330960854e-07, "loss": 0.0033, "num_tokens": 180097414.0, "reward": 1.4346997737884521, "reward_std": 0.12333646416664124, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.8788655996322632, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7818087339401245, "step": 367 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/max_terminated_length": 491.0, "completions/mean_length": 243.2421875, "completions/mean_terminated_length": 243.2421875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.5888, "grad_norm": 0.05108208209276199, "learning_rate": 4.5729537366548043e-07, "loss": 0.0105, "num_tokens": 180588972.0, "reward": 1.4830061197280884, "reward_std": 0.13395658135414124, "rewards/accuracy_reward_long_step": 0.59375, "rewards/final_brier_reward_long_step": 0.7892441749572754, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7677804231643677, "step": 368 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/max_terminated_length": 488.0, "completions/mean_length": 250.55859375, "completions/mean_terminated_length": 250.55859375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.5904, "grad_norm": 0.033352505415678024, "learning_rate": 4.555160142348754e-07, "loss": -0.0026, "num_tokens": 181083883.0, "reward": 1.5408804416656494, "reward_std": 0.17830899357795715, "rewards/accuracy_reward_long_step": 0.62109375, "rewards/final_brier_reward_long_step": 0.8529410362243652, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.826205849647522, "step": 369 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 660.0, "completions/max_terminated_length": 660.0, "completions/mean_length": 249.2734375, "completions/mean_terminated_length": 249.2734375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.592, "grad_norm": 0.03348240256309509, "learning_rate": 4.537366548042704e-07, "loss": 0.006, "num_tokens": 181570745.0, "reward": 1.3028314113616943, "reward_std": 0.21476979553699493, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.7177179455757141, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7357947826385498, "step": 370 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/max_terminated_length": 470.0, "completions/mean_length": 235.62109375, "completions/mean_terminated_length": 235.62109375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.5936, "grad_norm": 0.029231376945972443, "learning_rate": 4.519572953736655e-07, "loss": -0.0131, "num_tokens": 182061952.0, "reward": 1.4700736999511719, "reward_std": 0.0870005190372467, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7968558669090271, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.833439290523529, "step": 371 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/max_terminated_length": 489.0, "completions/mean_length": 244.97265625, "completions/mean_terminated_length": 244.97265625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.5952, "grad_norm": 0.0341234989464283, "learning_rate": 4.501779359430605e-07, "loss": -0.003, "num_tokens": 182547297.0, "reward": 1.5000258684158325, "reward_std": 0.12720796465873718, "rewards/accuracy_reward_long_step": 0.609375, "rewards/final_brier_reward_long_step": 0.8284410238265991, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7341622710227966, "step": 372 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 557.0, "completions/max_terminated_length": 557.0, "completions/mean_length": 272.23046875, "completions/mean_terminated_length": 272.23046875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.5968, "grad_norm": 0.032760150730609894, "learning_rate": 4.483985765124555e-07, "loss": 0.0008, "num_tokens": 183047828.0, "reward": 1.2245934009552002, "reward_std": 0.14938510954380035, "rewards/accuracy_reward_long_step": 0.33984375, "rewards/final_brier_reward_long_step": 0.738226592540741, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.800771951675415, "step": 373 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/max_terminated_length": 484.0, "completions/mean_length": 236.62109375, "completions/mean_terminated_length": 236.62109375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.5984, "grad_norm": 0.035608965903520584, "learning_rate": 4.466192170818505e-07, "loss": -0.0005, "num_tokens": 183518363.0, "reward": 1.4495090246200562, "reward_std": 0.1940980851650238, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.6856546401977539, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7530063390731812, "step": 374 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/max_terminated_length": 480.0, "completions/mean_length": 253.03125, "completions/mean_terminated_length": 253.03125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.6, "grad_norm": 0.03018086962401867, "learning_rate": 4.4483985765124553e-07, "loss": -0.0038, "num_tokens": 184007499.0, "reward": 1.4177751541137695, "reward_std": 0.1175907552242279, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.830146849155426, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7550168037414551, "step": 375 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/max_terminated_length": 414.0, "completions/mean_length": 231.6640625, "completions/mean_terminated_length": 231.6640625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.6016, "grad_norm": 0.03529973700642586, "learning_rate": 4.4306049822064055e-07, "loss": -0.005, "num_tokens": 184481933.0, "reward": 1.4107120037078857, "reward_std": 0.17283451557159424, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.6837781667709351, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7559454441070557, "step": 376 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/max_terminated_length": 490.0, "completions/mean_length": 248.328125, "completions/mean_terminated_length": 248.328125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.6032, "grad_norm": 0.032051555812358856, "learning_rate": 4.412811387900356e-07, "loss": 0.0033, "num_tokens": 184971921.0, "reward": 1.4520785808563232, "reward_std": 0.23111991584300995, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7146296501159668, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8124346733093262, "step": 377 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 507.0, "completions/max_terminated_length": 507.0, "completions/mean_length": 234.9921875, "completions/mean_terminated_length": 234.9921875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.6048, "grad_norm": 0.03357694298028946, "learning_rate": 4.395017793594306e-07, "loss": 0.0024, "num_tokens": 185440015.0, "reward": 1.429476261138916, "reward_std": 0.119395412504673, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7937347888946533, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7991704940795898, "step": 378 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 560.0, "completions/max_terminated_length": 560.0, "completions/mean_length": 260.4453125, "completions/mean_terminated_length": 260.4453125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.6064, "grad_norm": 0.029874242842197418, "learning_rate": 4.377224199288256e-07, "loss": 0.0054, "num_tokens": 185943985.0, "reward": 1.489135980606079, "reward_std": 0.123079814016819, "rewards/accuracy_reward_long_step": 0.5546875, "rewards/final_brier_reward_long_step": 0.8251116871833801, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.9126821160316467, "step": 379 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 592.0, "completions/max_terminated_length": 592.0, "completions/mean_length": 249.58203125, "completions/mean_terminated_length": 249.58203125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.608, "grad_norm": 0.03442827984690666, "learning_rate": 4.359430604982206e-07, "loss": -0.0082, "num_tokens": 186443814.0, "reward": 1.2993438243865967, "reward_std": 0.12685778737068176, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.7798289060592651, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7925466299057007, "step": 380 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 254.453125, "completions/mean_terminated_length": 254.453125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.6096, "grad_norm": 0.032972000539302826, "learning_rate": 4.341637010676156e-07, "loss": -0.002, "num_tokens": 186948906.0, "reward": 1.3653643131256104, "reward_std": 0.1496572494506836, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7358413934707642, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7881159782409668, "step": 381 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/max_terminated_length": 460.0, "completions/mean_length": 243.390625, "completions/mean_terminated_length": 243.390625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.6112, "grad_norm": 0.03480248898267746, "learning_rate": 4.3238434163701063e-07, "loss": 0.0027, "num_tokens": 187445558.0, "reward": 1.545555830001831, "reward_std": 0.17457936704158783, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.865576982498169, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8478966355323792, "step": 382 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 564.0, "completions/max_terminated_length": 564.0, "completions/mean_length": 252.15234375, "completions/mean_terminated_length": 252.15234375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.6128, "grad_norm": 0.03224232792854309, "learning_rate": 4.306049822064057e-07, "loss": 0.0002, "num_tokens": 187939005.0, "reward": 1.5401490926742554, "reward_std": 0.1554555892944336, "rewards/accuracy_reward_long_step": 0.625, "rewards/final_brier_reward_long_step": 0.8096957206726074, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8509008288383484, "step": 383 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 557.0, "completions/max_terminated_length": 557.0, "completions/mean_length": 249.1328125, "completions/mean_terminated_length": 249.1328125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.6144, "grad_norm": 0.029847221449017525, "learning_rate": 4.288256227758007e-07, "loss": 0.0013, "num_tokens": 188446527.0, "reward": 1.3999953269958496, "reward_std": 0.09057177603244781, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.7741625308990479, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.794569194316864, "step": 384 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 580.0, "completions/max_terminated_length": 580.0, "completions/mean_length": 264.25390625, "completions/mean_terminated_length": 264.25390625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.616, "grad_norm": 0.031437478959560394, "learning_rate": 4.2704626334519573e-07, "loss": 0.0145, "num_tokens": 188934968.0, "reward": 1.4329785108566284, "reward_std": 0.14642232656478882, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.7519199252128601, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8081189393997192, "step": 385 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/max_terminated_length": 450.0, "completions/mean_length": 246.9140625, "completions/mean_terminated_length": 246.9140625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.6176, "grad_norm": 0.04302318021655083, "learning_rate": 4.2526690391459074e-07, "loss": -0.0079, "num_tokens": 189434826.0, "reward": 1.3921318054199219, "reward_std": 0.17010337114334106, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.7150309085845947, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6816216111183167, "step": 386 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 590.0, "completions/max_terminated_length": 590.0, "completions/mean_length": 254.37890625, "completions/mean_terminated_length": 254.37890625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.6192, "grad_norm": 0.03536539152264595, "learning_rate": 4.2348754448398576e-07, "loss": -0.0079, "num_tokens": 189913979.0, "reward": 1.4037388563156128, "reward_std": 0.11961972713470459, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.738335907459259, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8141195178031921, "step": 387 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 266.3203125, "completions/mean_terminated_length": 266.3203125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.6208, "grad_norm": 0.03417327627539635, "learning_rate": 4.217081850533807e-07, "loss": -0.0023, "num_tokens": 190421693.0, "reward": 1.4262065887451172, "reward_std": 0.10677627474069595, "rewards/accuracy_reward_long_step": 0.53515625, "rewards/final_brier_reward_long_step": 0.7934492230415344, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7707524299621582, "step": 388 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/max_terminated_length": 442.0, "completions/mean_length": 267.72265625, "completions/mean_terminated_length": 267.72265625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.6224, "grad_norm": 0.03453484922647476, "learning_rate": 4.199288256227758e-07, "loss": 0.0004, "num_tokens": 190935686.0, "reward": 1.2190345525741577, "reward_std": 0.14494457840919495, "rewards/accuracy_reward_long_step": 0.33984375, "rewards/final_brier_reward_long_step": 0.7603257894515991, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.756437361240387, "step": 389 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/max_terminated_length": 447.0, "completions/mean_length": 248.13671875, "completions/mean_terminated_length": 248.13671875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.624, "grad_norm": 0.033618371933698654, "learning_rate": 4.181494661921708e-07, "loss": 0.0001, "num_tokens": 191423305.0, "reward": 1.367628812789917, "reward_std": 0.13067224621772766, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7947276830673218, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7695374488830566, "step": 390 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 564.0, "completions/max_terminated_length": 564.0, "completions/mean_length": 263.71875, "completions/mean_terminated_length": 263.71875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.6256, "grad_norm": 0.0348045788705349, "learning_rate": 4.163701067615658e-07, "loss": 0.0067, "num_tokens": 191928945.0, "reward": 1.6183103322982788, "reward_std": 0.16025137901306152, "rewards/accuracy_reward_long_step": 0.71875, "rewards/final_brier_reward_long_step": 0.8521628379821777, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7538907527923584, "step": 391 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 599.0, "completions/max_terminated_length": 599.0, "completions/mean_length": 265.98828125, "completions/mean_terminated_length": 265.98828125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.6272, "grad_norm": 0.028777770698070526, "learning_rate": 4.1459074733096083e-07, "loss": 0.0073, "num_tokens": 192431814.0, "reward": 1.4733824729919434, "reward_std": 0.14600692689418793, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.7584691047668457, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7913106679916382, "step": 392 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/max_terminated_length": 462.0, "completions/mean_length": 263.95703125, "completions/mean_terminated_length": 263.95703125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.6288, "grad_norm": 0.03086771070957184, "learning_rate": 4.1281138790035585e-07, "loss": -0.014, "num_tokens": 192932539.0, "reward": 1.2810169458389282, "reward_std": 0.08168387413024902, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.6735238432884216, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7239813804626465, "step": 393 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 504.0, "completions/max_terminated_length": 504.0, "completions/mean_length": 252.90625, "completions/mean_terminated_length": 253.89805603027344, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.6304, "grad_norm": 0.033842138946056366, "learning_rate": 4.1103202846975086e-07, "loss": -0.0147, "num_tokens": 193433083.0, "reward": 1.4617502689361572, "reward_std": 0.16944709420204163, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.8041574358940125, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7694060206413269, "step": 394 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 252.5546875, "completions/mean_terminated_length": 252.5546875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.632, "grad_norm": 0.035002097487449646, "learning_rate": 4.0925266903914593e-07, "loss": 0.0012, "num_tokens": 193924961.0, "reward": 1.5998687744140625, "reward_std": 0.14062157273292542, "rewards/accuracy_reward_long_step": 0.69140625, "rewards/final_brier_reward_long_step": 0.7885218858718872, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8453285694122314, "step": 395 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 537.0, "completions/max_terminated_length": 537.0, "completions/mean_length": 264.9765625, "completions/mean_terminated_length": 264.9765625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.6336, "grad_norm": 0.0325743593275547, "learning_rate": 4.0747330960854094e-07, "loss": 0.0099, "num_tokens": 194417523.0, "reward": 1.5281362533569336, "reward_std": 0.14003178477287292, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.816343367099762, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8274516463279724, "step": 396 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.0, "completions/max_terminated_length": 524.0, "completions/mean_length": 266.8125, "completions/mean_terminated_length": 266.8125, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.6352, "grad_norm": 0.044015269726514816, "learning_rate": 4.0569395017793596e-07, "loss": -0.0169, "num_tokens": 194911235.0, "reward": 1.1511602401733398, "reward_std": 0.0747460424900055, "rewards/accuracy_reward_long_step": 0.27734375, "rewards/final_brier_reward_long_step": 0.7376371622085571, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7576285004615784, "step": 397 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 258.0625, "completions/mean_terminated_length": 258.0625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.6368, "grad_norm": 0.035533856600522995, "learning_rate": 4.039145907473309e-07, "loss": 0.0121, "num_tokens": 195414379.0, "reward": 1.45721435546875, "reward_std": 0.18415778875350952, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.7342562675476074, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8289762735366821, "step": 398 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/max_terminated_length": 487.0, "completions/mean_length": 260.1484375, "completions/mean_terminated_length": 260.1484375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.6384, "grad_norm": 0.04100572690367699, "learning_rate": 4.0213523131672593e-07, "loss": 0.0142, "num_tokens": 195921505.0, "reward": 1.4241199493408203, "reward_std": 0.12187729775905609, "rewards/accuracy_reward_long_step": 0.53515625, "rewards/final_brier_reward_long_step": 0.7385929822921753, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8172620534896851, "step": 399 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 545.0, "completions/max_terminated_length": 545.0, "completions/mean_length": 260.5234375, "completions/mean_terminated_length": 260.5234375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.64, "grad_norm": 0.03168834373354912, "learning_rate": 4.0035587188612095e-07, "loss": 0.015, "num_tokens": 196418567.0, "reward": 1.468137264251709, "reward_std": 0.12736788392066956, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.796457052230835, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8885919451713562, "step": 400 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 259.34765625, "completions/mean_terminated_length": 259.34765625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.6416, "grad_norm": 0.07957521826028824, "learning_rate": 3.98576512455516e-07, "loss": 0.0156, "num_tokens": 196899368.0, "reward": 1.3013485670089722, "reward_std": 0.1320360153913498, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7306581735610962, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8028608560562134, "step": 401 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/max_terminated_length": 417.0, "completions/mean_length": 258.40234375, "completions/mean_terminated_length": 258.40234375, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.6432, "grad_norm": 0.04176841303706169, "learning_rate": 3.9679715302491103e-07, "loss": 0.0149, "num_tokens": 197393599.0, "reward": 1.4583325386047363, "reward_std": 0.17372997105121613, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7547035217285156, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8286267518997192, "step": 402 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/max_terminated_length": 430.0, "completions/mean_length": 262.3828125, "completions/mean_terminated_length": 262.3828125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.6448, "grad_norm": 0.031080788001418114, "learning_rate": 3.9501779359430604e-07, "loss": -0.008, "num_tokens": 197880809.0, "reward": 1.3304669857025146, "reward_std": 0.11997392773628235, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.7705242037773132, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7700934410095215, "step": 403 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/max_terminated_length": 539.0, "completions/mean_length": 257.9296875, "completions/mean_terminated_length": 257.9296875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.6464, "grad_norm": 0.0383228063583374, "learning_rate": 3.9323843416370106e-07, "loss": 0.0168, "num_tokens": 198364775.0, "reward": 1.4343302249908447, "reward_std": 0.13839168846607208, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7040433883666992, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7832778692245483, "step": 404 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 245.3828125, "completions/mean_terminated_length": 245.3828125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.648, "grad_norm": 0.04407874867320061, "learning_rate": 3.9145907473309607e-07, "loss": -0.0001, "num_tokens": 198837905.0, "reward": 1.4450054168701172, "reward_std": 0.15000846982002258, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7329072952270508, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.765864372253418, "step": 405 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/max_terminated_length": 488.0, "completions/mean_length": 248.64453125, "completions/mean_terminated_length": 248.64453125, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.6496, "grad_norm": 0.035016220062971115, "learning_rate": 3.896797153024911e-07, "loss": -0.0174, "num_tokens": 199319566.0, "reward": 1.3514586687088013, "reward_std": 0.09576141834259033, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7985478639602661, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7166616916656494, "step": 406 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/max_terminated_length": 486.0, "completions/mean_length": 270.6796875, "completions/mean_terminated_length": 270.6796875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.6512, "grad_norm": 0.03493834286928177, "learning_rate": 3.879003558718861e-07, "loss": 0.005, "num_tokens": 199798012.0, "reward": 1.2670437097549438, "reward_std": 0.1575869619846344, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.7614452242851257, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8067296743392944, "step": 407 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 603.0, "completions/max_terminated_length": 603.0, "completions/mean_length": 257.234375, "completions/mean_terminated_length": 257.234375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.6528, "grad_norm": 0.17604607343673706, "learning_rate": 3.861209964412811e-07, "loss": -0.0137, "num_tokens": 200299744.0, "reward": 1.2874679565429688, "reward_std": 0.11275781691074371, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.7716461420059204, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7844761610031128, "step": 408 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/max_terminated_length": 490.0, "completions/mean_length": 245.10546875, "completions/mean_terminated_length": 245.10546875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.6544, "grad_norm": 0.03474588319659233, "learning_rate": 3.8434163701067613e-07, "loss": 0.0102, "num_tokens": 200789587.0, "reward": 1.3615117073059082, "reward_std": 0.13387925922870636, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.6371433734893799, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7620280385017395, "step": 409 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/max_terminated_length": 431.0, "completions/mean_length": 238.515625, "completions/mean_terminated_length": 238.515625, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.656, "grad_norm": 0.036233462393283844, "learning_rate": 3.8256227758007115e-07, "loss": 0.0044, "num_tokens": 201275391.0, "reward": 1.3896780014038086, "reward_std": 0.13047534227371216, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.6849026679992676, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8113091588020325, "step": 410 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/max_terminated_length": 504.0, "completions/mean_length": 241.09375, "completions/mean_terminated_length": 241.09375, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.6576, "grad_norm": 0.03837813064455986, "learning_rate": 3.8078291814946616e-07, "loss": -0.0032, "num_tokens": 201756703.0, "reward": 1.3157711029052734, "reward_std": 0.10341217368841171, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.7142456769943237, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7207139134407043, "step": 411 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 591.0, "completions/max_terminated_length": 591.0, "completions/mean_length": 238.15234375, "completions/mean_terminated_length": 238.15234375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.6592, "grad_norm": 0.0354132242500782, "learning_rate": 3.790035587188612e-07, "loss": 0.0055, "num_tokens": 202225950.0, "reward": 1.4732050895690918, "reward_std": 0.1469092220067978, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.7194160223007202, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8218415975570679, "step": 412 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/max_terminated_length": 491.0, "completions/mean_length": 247.93359375, "completions/mean_terminated_length": 247.93359375, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.6608, "grad_norm": 0.046107884496450424, "learning_rate": 3.7722419928825624e-07, "loss": -0.0093, "num_tokens": 202709477.0, "reward": 1.3391033411026, "reward_std": 0.08157768845558167, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.6804527044296265, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8165856599807739, "step": 413 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/max_terminated_length": 441.0, "completions/mean_length": 225.625, "completions/mean_terminated_length": 225.625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.6624, "grad_norm": 0.04497173801064491, "learning_rate": 3.7544483985765126e-07, "loss": 0.0046, "num_tokens": 203200421.0, "reward": 1.4665465354919434, "reward_std": 0.10675959289073944, "rewards/accuracy_reward_long_step": 0.6015625, "rewards/final_brier_reward_long_step": 0.7531781196594238, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7067579030990601, "step": 414 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 511.0, "completions/max_terminated_length": 511.0, "completions/mean_length": 254.66015625, "completions/mean_terminated_length": 254.66015625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.664, "grad_norm": 0.037333909422159195, "learning_rate": 3.7366548042704627e-07, "loss": -0.005, "num_tokens": 203701798.0, "reward": 1.374776840209961, "reward_std": 0.12094822525978088, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7455586194992065, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.753548264503479, "step": 415 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/max_terminated_length": 482.0, "completions/mean_length": 226.84375, "completions/mean_terminated_length": 226.84375, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.6656, "grad_norm": 0.039919789880514145, "learning_rate": 3.718861209964413e-07, "loss": 0.025, "num_tokens": 204175030.0, "reward": 1.562011480331421, "reward_std": 0.06771315634250641, "rewards/accuracy_reward_long_step": 0.64453125, "rewards/final_brier_reward_long_step": 0.8121625185012817, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8577582836151123, "step": 416 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/max_terminated_length": 409.0, "completions/mean_length": 243.203125, "completions/mean_terminated_length": 243.203125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.6672, "grad_norm": 0.04114522784948349, "learning_rate": 3.7010676156583625e-07, "loss": 0.0153, "num_tokens": 204655778.0, "reward": 1.4633723497390747, "reward_std": 0.0920601338148117, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.8287187218666077, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8528955578804016, "step": 417 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 606.0, "completions/max_terminated_length": 606.0, "completions/mean_length": 244.66015625, "completions/mean_terminated_length": 244.66015625, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.6688, "grad_norm": 0.0328848697245121, "learning_rate": 3.6832740213523126e-07, "loss": 0.005, "num_tokens": 205149811.0, "reward": 1.3774842023849487, "reward_std": 0.13209398090839386, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.805209755897522, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.751602292060852, "step": 418 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/max_terminated_length": 447.0, "completions/mean_length": 235.80859375, "completions/mean_terminated_length": 235.80859375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.6704, "grad_norm": 0.04273487254977226, "learning_rate": 3.6654804270462633e-07, "loss": 0.0065, "num_tokens": 205628714.0, "reward": 1.306333303451538, "reward_std": 0.1178286001086235, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7348085641860962, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8186495304107666, "step": 419 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 581.0, "completions/max_terminated_length": 581.0, "completions/mean_length": 244.96484375, "completions/mean_terminated_length": 244.96484375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.672, "grad_norm": 0.04026668146252632, "learning_rate": 3.6476868327402134e-07, "loss": 0.0007, "num_tokens": 206110425.0, "reward": 1.3591866493225098, "reward_std": 0.13395720720291138, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7895034551620483, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7566181421279907, "step": 420 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 590.0, "completions/max_terminated_length": 590.0, "completions/mean_length": 244.67578125, "completions/mean_terminated_length": 244.67578125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.6736, "grad_norm": 0.040121398866176605, "learning_rate": 3.6298932384341636e-07, "loss": -0.0165, "num_tokens": 206586750.0, "reward": 1.3462982177734375, "reward_std": 0.140256866812706, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.6518319845199585, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8271111249923706, "step": 421 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.0, "completions/max_terminated_length": 382.0, "completions/mean_length": 234.125, "completions/mean_terminated_length": 234.125, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.6752, "grad_norm": 0.04478037729859352, "learning_rate": 3.6120996441281137e-07, "loss": 0.0001, "num_tokens": 207062502.0, "reward": 1.5341248512268066, "reward_std": 0.12208271771669388, "rewards/accuracy_reward_long_step": 0.63671875, "rewards/final_brier_reward_long_step": 0.7614644765853882, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8281601071357727, "step": 422 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/max_terminated_length": 460.0, "completions/mean_length": 246.85546875, "completions/mean_terminated_length": 246.85546875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.6768, "grad_norm": 0.041672661900520325, "learning_rate": 3.594306049822064e-07, "loss": 0.0102, "num_tokens": 207558433.0, "reward": 1.2478289604187012, "reward_std": 0.09173645079135895, "rewards/accuracy_reward_long_step": 0.34375, "rewards/final_brier_reward_long_step": 0.8001093864440918, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8162060976028442, "step": 423 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/max_terminated_length": 406.0, "completions/mean_length": 235.1484375, "completions/mean_terminated_length": 235.1484375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.6784, "grad_norm": 0.04561910033226013, "learning_rate": 3.576512455516014e-07, "loss": -0.0036, "num_tokens": 208031183.0, "reward": 1.43558931350708, "reward_std": 0.09829960763454437, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.8206312656402588, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8748506903648376, "step": 424 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/max_terminated_length": 493.0, "completions/mean_length": 252.4140625, "completions/mean_terminated_length": 252.4140625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.68, "grad_norm": 0.039236631244421005, "learning_rate": 3.5587188612099647e-07, "loss": -0.0036, "num_tokens": 208527737.0, "reward": 1.433516263961792, "reward_std": 0.17410725355148315, "rewards/accuracy_reward_long_step": 0.53515625, "rewards/final_brier_reward_long_step": 0.7744226455688477, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8268297910690308, "step": 425 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/max_terminated_length": 464.0, "completions/mean_length": 223.62890625, "completions/mean_terminated_length": 223.62890625, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.6816, "grad_norm": 0.034077707678079605, "learning_rate": 3.540925266903915e-07, "loss": -0.0108, "num_tokens": 209000154.0, "reward": 1.3932843208312988, "reward_std": 0.08521192520856857, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.9108027219772339, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8498345017433167, "step": 426 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 882.0, "completions/max_terminated_length": 882.0, "completions/mean_length": 240.484375, "completions/mean_terminated_length": 240.484375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.6832, "grad_norm": 0.04128441587090492, "learning_rate": 3.5231316725978644e-07, "loss": -0.0074, "num_tokens": 209489150.0, "reward": 1.565014123916626, "reward_std": 0.15992087125778198, "rewards/accuracy_reward_long_step": 0.671875, "rewards/final_brier_reward_long_step": 0.7421960830688477, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8381730318069458, "step": 427 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 244.76953125, "completions/mean_terminated_length": 244.76953125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.6848, "grad_norm": 0.043866030871868134, "learning_rate": 3.5053380782918146e-07, "loss": -0.0145, "num_tokens": 209985339.0, "reward": 1.3970236778259277, "reward_std": 0.16603073477745056, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7696589827537537, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8262485861778259, "step": 428 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 234.33984375, "completions/mean_terminated_length": 234.33984375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.6864, "grad_norm": 0.042403049767017365, "learning_rate": 3.4875444839857647e-07, "loss": 0.0113, "num_tokens": 210472786.0, "reward": 1.5308232307434082, "reward_std": 0.12255808711051941, "rewards/accuracy_reward_long_step": 0.63671875, "rewards/final_brier_reward_long_step": 0.7184535264968872, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8579643964767456, "step": 429 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/max_terminated_length": 550.0, "completions/mean_length": 230.375, "completions/mean_terminated_length": 230.375, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.688, "grad_norm": 0.040890295058488846, "learning_rate": 3.469750889679715e-07, "loss": 0.0102, "num_tokens": 210951450.0, "reward": 1.3568546772003174, "reward_std": 0.20398034155368805, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7493456602096558, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7952607274055481, "step": 430 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/max_terminated_length": 497.0, "completions/mean_length": 240.0, "completions/mean_terminated_length": 240.0, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.6896, "grad_norm": 0.03938218578696251, "learning_rate": 3.4519572953736656e-07, "loss": 0.0071, "num_tokens": 211431050.0, "reward": 1.369812250137329, "reward_std": 0.16209545731544495, "rewards/accuracy_reward_long_step": 0.4921875, "rewards/final_brier_reward_long_step": 0.7736667990684509, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7368321418762207, "step": 431 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/max_terminated_length": 464.0, "completions/mean_length": 238.2109375, "completions/mean_terminated_length": 238.2109375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.6912, "grad_norm": 0.04615631699562073, "learning_rate": 3.4341637010676157e-07, "loss": 0.0087, "num_tokens": 211919552.0, "reward": 1.415741205215454, "reward_std": 0.0865730568766594, "rewards/accuracy_reward_long_step": 0.5078125, "rewards/final_brier_reward_long_step": 0.7965086102485657, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8352065682411194, "step": 432 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/max_terminated_length": 500.0, "completions/mean_length": 230.30078125, "completions/mean_terminated_length": 230.30078125, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.6928, "grad_norm": 0.045928288251161575, "learning_rate": 3.416370106761566e-07, "loss": 0.0084, "num_tokens": 212409581.0, "reward": 1.4080349206924438, "reward_std": 0.10854038596153259, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.8253128528594971, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8224518895149231, "step": 433 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 513.0, "completions/max_terminated_length": 513.0, "completions/mean_length": 243.75390625, "completions/mean_terminated_length": 243.75390625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.6944, "grad_norm": 0.036884017288684845, "learning_rate": 3.398576512455516e-07, "loss": -0.01, "num_tokens": 212874262.0, "reward": 1.3945372104644775, "reward_std": 0.1757126748561859, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.8027616143226624, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7597622275352478, "step": 434 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/max_terminated_length": 457.0, "completions/mean_length": 226.0, "completions/mean_terminated_length": 226.0, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.696, "grad_norm": 0.03966812789440155, "learning_rate": 3.380782918149466e-07, "loss": 0.0137, "num_tokens": 213355310.0, "reward": 1.436761736869812, "reward_std": 0.10320307314395905, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.7525194883346558, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7914024591445923, "step": 435 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 240.4921875, "completions/mean_terminated_length": 240.4921875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.6976, "grad_norm": 0.04616158828139305, "learning_rate": 3.3629893238434163e-07, "loss": -0.0162, "num_tokens": 213835492.0, "reward": 1.345383882522583, "reward_std": 0.14034321904182434, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.783481240272522, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8168047666549683, "step": 436 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 565.0, "completions/max_terminated_length": 565.0, "completions/mean_length": 225.7578125, "completions/mean_terminated_length": 225.7578125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.6992, "grad_norm": 0.04164176806807518, "learning_rate": 3.3451957295373664e-07, "loss": 0.0035, "num_tokens": 214322966.0, "reward": 1.3548498153686523, "reward_std": 0.14276200532913208, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.8077800273895264, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7991191744804382, "step": 437 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 226.65625, "completions/mean_terminated_length": 226.65625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.7008, "grad_norm": 0.03757995367050171, "learning_rate": 3.3274021352313166e-07, "loss": 0.0176, "num_tokens": 214794966.0, "reward": 1.5220391750335693, "reward_std": 0.1383122354745865, "rewards/accuracy_reward_long_step": 0.62109375, "rewards/final_brier_reward_long_step": 0.7596441507339478, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8441376686096191, "step": 438 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 592.0, "completions/max_terminated_length": 592.0, "completions/mean_length": 237.671875, "completions/mean_terminated_length": 237.671875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.7024, "grad_norm": 0.04206470027565956, "learning_rate": 3.3096085409252667e-07, "loss": 0.0228, "num_tokens": 215274386.0, "reward": 1.4969501495361328, "reward_std": 0.1166161373257637, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.8239851593971252, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.820065438747406, "step": 439 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 241.5390625, "completions/mean_terminated_length": 241.5390625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.704, "grad_norm": 0.03900426998734474, "learning_rate": 3.291814946619217e-07, "loss": -0.0161, "num_tokens": 215761204.0, "reward": 1.4888627529144287, "reward_std": 0.07008583098649979, "rewards/accuracy_reward_long_step": 0.61328125, "rewards/final_brier_reward_long_step": 0.7496683597564697, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7526575326919556, "step": 440 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.0, "completions/max_terminated_length": 416.0, "completions/mean_length": 226.73828125, "completions/mean_terminated_length": 226.73828125, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.7056, "grad_norm": 0.04039409011602402, "learning_rate": 3.274021352313167e-07, "loss": 0.0156, "num_tokens": 216251281.0, "reward": 1.4942941665649414, "reward_std": 0.14357957243919373, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.7177531123161316, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8375486135482788, "step": 441 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 486.0, "completions/max_terminated_length": 486.0, "completions/mean_length": 224.7421875, "completions/mean_terminated_length": 225.62353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.7072, "grad_norm": 0.03879899904131889, "learning_rate": 3.256227758007117e-07, "loss": -0.0177, "num_tokens": 216745127.0, "reward": 1.3038554191589355, "reward_std": 0.1679355949163437, "rewards/accuracy_reward_long_step": 0.421875, "rewards/final_brier_reward_long_step": 0.7172562479972839, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8184775114059448, "step": 442 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 528.0, "completions/max_terminated_length": 528.0, "completions/mean_length": 225.91796875, "completions/mean_terminated_length": 225.91796875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.7088, "grad_norm": 0.04107962176203728, "learning_rate": 3.238434163701068e-07, "loss": 0.0022, "num_tokens": 217241754.0, "reward": 1.3336093425750732, "reward_std": 0.11196212470531464, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.7749031186103821, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.85640949010849, "step": 443 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 226.2890625, "completions/mean_terminated_length": 226.2890625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.7104, "grad_norm": 0.04981570690870285, "learning_rate": 3.220640569395018e-07, "loss": -0.0113, "num_tokens": 217723420.0, "reward": 1.4336767196655273, "reward_std": 0.12888742983341217, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.7152671813964844, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7538148760795593, "step": 444 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 220.1328125, "completions/mean_terminated_length": 220.1328125, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.712, "grad_norm": 0.043584324419498444, "learning_rate": 3.202846975088968e-07, "loss": 0.0119, "num_tokens": 218212494.0, "reward": 1.4643619060516357, "reward_std": 0.12725988030433655, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.7841925621032715, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8076297640800476, "step": 445 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/max_terminated_length": 395.0, "completions/mean_length": 230.3515625, "completions/mean_terminated_length": 230.3515625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.7136, "grad_norm": 0.03610699251294136, "learning_rate": 3.1850533807829177e-07, "loss": -0.0169, "num_tokens": 218694184.0, "reward": 1.559215784072876, "reward_std": 0.07399096339941025, "rewards/accuracy_reward_long_step": 0.66015625, "rewards/final_brier_reward_long_step": 0.790971040725708, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8052672147750854, "step": 446 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/max_terminated_length": 466.0, "completions/mean_length": 217.97265625, "completions/mean_terminated_length": 217.97265625, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.7152, "grad_norm": 0.0351036936044693, "learning_rate": 3.167259786476868e-07, "loss": -0.0161, "num_tokens": 219155313.0, "reward": 1.5995757579803467, "reward_std": 0.08523780107498169, "rewards/accuracy_reward_long_step": 0.7109375, "rewards/final_brier_reward_long_step": 0.7231503129005432, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8314027786254883, "step": 447 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 589.0, "completions/max_terminated_length": 589.0, "completions/mean_length": 248.65234375, "completions/mean_terminated_length": 248.65234375, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.7168, "grad_norm": 0.04068749397993088, "learning_rate": 3.149466192170818e-07, "loss": 0.0197, "num_tokens": 219654384.0, "reward": 1.4963853359222412, "reward_std": 0.1300518661737442, "rewards/accuracy_reward_long_step": 0.578125, "rewards/final_brier_reward_long_step": 0.794231653213501, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8788096904754639, "step": 448 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 595.0, "completions/max_terminated_length": 595.0, "completions/mean_length": 225.171875, "completions/mean_terminated_length": 225.171875, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.7184, "grad_norm": 0.037970248609781265, "learning_rate": 3.1316725978647687e-07, "loss": 0.0184, "num_tokens": 220128044.0, "reward": 1.2982159852981567, "reward_std": 0.18391726911067963, "rewards/accuracy_reward_long_step": 0.421875, "rewards/final_brier_reward_long_step": 0.7201941013336182, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7851696014404297, "step": 449 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/max_terminated_length": 462.0, "completions/mean_length": 233.64453125, "completions/mean_terminated_length": 233.64453125, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.72, "grad_norm": 0.040078479796648026, "learning_rate": 3.113879003558719e-07, "loss": -0.0011, "num_tokens": 220588769.0, "reward": 1.5124320983886719, "reward_std": 0.10843676328659058, "rewards/accuracy_reward_long_step": 0.6015625, "rewards/final_brier_reward_long_step": 0.7919909954071045, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8593000173568726, "step": 450 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 217.15234375, "completions/mean_terminated_length": 217.15234375, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.7216, "grad_norm": 0.03554774448275566, "learning_rate": 3.096085409252669e-07, "loss": 0.007, "num_tokens": 221075272.0, "reward": 1.4489461183547974, "reward_std": 0.10056018829345703, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.811571478843689, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.85921311378479, "step": 451 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 220.69140625, "completions/mean_terminated_length": 220.69140625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.7232, "grad_norm": 0.03730124980211258, "learning_rate": 3.078291814946619e-07, "loss": -0.0023, "num_tokens": 221558689.0, "reward": 1.3989410400390625, "reward_std": 0.10487143695354462, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.6452429294586182, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7473963499069214, "step": 452 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.0, "completions/max_terminated_length": 518.0, "completions/mean_length": 217.04296875, "completions/mean_terminated_length": 217.04296875, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.7248, "grad_norm": 0.038142986595630646, "learning_rate": 3.0604982206405693e-07, "loss": -0.025, "num_tokens": 222026756.0, "reward": 1.5077660083770752, "reward_std": 0.146861732006073, "rewards/accuracy_reward_long_step": 0.625, "rewards/final_brier_reward_long_step": 0.6923167705535889, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8387469053268433, "step": 453 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/max_terminated_length": 453.0, "completions/mean_length": 219.828125, "completions/mean_terminated_length": 219.828125, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.7264, "grad_norm": 0.039899833500385284, "learning_rate": 3.0427046263345194e-07, "loss": -0.0029, "num_tokens": 222498648.0, "reward": 1.468416452407837, "reward_std": 0.08466193825006485, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.8156249523162842, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8861656188964844, "step": 454 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 226.6796875, "completions/mean_terminated_length": 226.6796875, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.728, "grad_norm": 0.04437141865491867, "learning_rate": 3.02491103202847e-07, "loss": 0.009, "num_tokens": 222986366.0, "reward": 1.3412034511566162, "reward_std": 0.17921078205108643, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7214792966842651, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8464598655700684, "step": 455 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/max_terminated_length": 463.0, "completions/mean_length": 231.34765625, "completions/mean_terminated_length": 231.34765625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.7296, "grad_norm": 0.03869875520467758, "learning_rate": 3.0071174377224197e-07, "loss": 0.0084, "num_tokens": 223455831.0, "reward": 1.5209224224090576, "reward_std": 0.1157989650964737, "rewards/accuracy_reward_long_step": 0.6328125, "rewards/final_brier_reward_long_step": 0.7708175778388977, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7816216945648193, "step": 456 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/max_terminated_length": 430.0, "completions/mean_length": 227.9765625, "completions/mean_terminated_length": 227.9765625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.7312, "grad_norm": 0.04939800873398781, "learning_rate": 2.98932384341637e-07, "loss": -0.0007, "num_tokens": 223944633.0, "reward": 1.2755203247070312, "reward_std": 0.16737382113933563, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.694128155708313, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7517032623291016, "step": 457 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/max_terminated_length": 434.0, "completions/mean_length": 209.98046875, "completions/mean_terminated_length": 209.98046875, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.7328, "grad_norm": 0.0375138595700264, "learning_rate": 2.97153024911032e-07, "loss": 0.0098, "num_tokens": 224409132.0, "reward": 1.4265563488006592, "reward_std": 0.17411382496356964, "rewards/accuracy_reward_long_step": 0.53515625, "rewards/final_brier_reward_long_step": 0.7519593834877014, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8214536309242249, "step": 458 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 536.0, "completions/max_terminated_length": 536.0, "completions/mean_length": 220.8828125, "completions/mean_terminated_length": 220.8828125, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.7344, "grad_norm": 0.03728632628917694, "learning_rate": 2.95373665480427e-07, "loss": -0.0191, "num_tokens": 224884646.0, "reward": 1.4635272026062012, "reward_std": 0.08029723912477493, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7463421821594238, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8577666282653809, "step": 459 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 674.0, "completions/max_terminated_length": 674.0, "completions/mean_length": 234.73828125, "completions/mean_terminated_length": 234.73828125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.736, "grad_norm": 0.032615575939416885, "learning_rate": 2.9359430604982203e-07, "loss": 0.0027, "num_tokens": 225385123.0, "reward": 1.5013632774353027, "reward_std": 0.09813569486141205, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.8148428201675415, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8312351703643799, "step": 460 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 231.265625, "completions/mean_terminated_length": 231.265625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.7376, "grad_norm": 0.038810133934020996, "learning_rate": 2.918149466192171e-07, "loss": 0.0063, "num_tokens": 225872791.0, "reward": 1.4602744579315186, "reward_std": 0.15246982872486115, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.8496265411376953, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8352211713790894, "step": 461 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 226.99609375, "completions/mean_terminated_length": 226.99609375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.7392, "grad_norm": 0.037601787596940994, "learning_rate": 2.900355871886121e-07, "loss": -0.0017, "num_tokens": 226377910.0, "reward": 1.4851224422454834, "reward_std": 0.12114303559064865, "rewards/accuracy_reward_long_step": 0.6015625, "rewards/final_brier_reward_long_step": 0.691071093082428, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8431686162948608, "step": 462 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/max_terminated_length": 422.0, "completions/mean_length": 222.1015625, "completions/mean_terminated_length": 222.1015625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.7408, "grad_norm": 0.03438973426818848, "learning_rate": 2.882562277580071e-07, "loss": 0.0082, "num_tokens": 226869616.0, "reward": 1.5410441160202026, "reward_std": 0.10235248506069183, "rewards/accuracy_reward_long_step": 0.65625, "rewards/final_brier_reward_long_step": 0.7193183302879333, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8198581337928772, "step": 463 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 383.0, "completions/max_terminated_length": 383.0, "completions/mean_length": 220.09375, "completions/mean_terminated_length": 220.09375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.7424, "grad_norm": 0.055105436593294144, "learning_rate": 2.8647686832740214e-07, "loss": -0.0036, "num_tokens": 227356184.0, "reward": 1.388896107673645, "reward_std": 0.10595919191837311, "rewards/accuracy_reward_long_step": 0.5546875, "rewards/final_brier_reward_long_step": 0.5826694965362549, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7541650533676147, "step": 464 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 575.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 223.9765625, "completions/mean_terminated_length": 223.9765625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.744, "grad_norm": 0.033698540180921555, "learning_rate": 2.8469750889679715e-07, "loss": 0.0016, "num_tokens": 227845482.0, "reward": 1.5462651252746582, "reward_std": 0.10527972877025604, "rewards/accuracy_reward_long_step": 0.62890625, "rewards/final_brier_reward_long_step": 0.8236533403396606, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8457825183868408, "step": 465 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/max_terminated_length": 639.0, "completions/mean_length": 229.96484375, "completions/mean_terminated_length": 229.96484375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.7456, "grad_norm": 0.05342903360724449, "learning_rate": 2.829181494661921e-07, "loss": 0.0089, "num_tokens": 228346801.0, "reward": 1.3685379028320312, "reward_std": 0.1639987826347351, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7799558639526367, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7723206281661987, "step": 466 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 230.171875, "completions/mean_terminated_length": 230.171875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.7472, "grad_norm": 0.037923168390989304, "learning_rate": 2.811387900355872e-07, "loss": -0.0086, "num_tokens": 228834093.0, "reward": 1.4359982013702393, "reward_std": 0.12845033407211304, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7942116856575012, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.824781060218811, "step": 467 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 226.984375, "completions/mean_terminated_length": 226.984375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.7488, "grad_norm": 0.040297914296388626, "learning_rate": 2.793594306049822e-07, "loss": -0.012, "num_tokens": 229311481.0, "reward": 1.357725977897644, "reward_std": 0.11814339458942413, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.7514722347259521, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.820056676864624, "step": 468 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/max_terminated_length": 468.0, "completions/mean_length": 220.39453125, "completions/mean_terminated_length": 220.39453125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.7504, "grad_norm": 0.048747822642326355, "learning_rate": 2.775800711743772e-07, "loss": 0.0175, "num_tokens": 229784622.0, "reward": 1.3947033882141113, "reward_std": 0.13809074461460114, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.7879499793052673, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.775239109992981, "step": 469 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/max_terminated_length": 457.0, "completions/mean_length": 220.22265625, "completions/mean_terminated_length": 220.22265625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.752, "grad_norm": 0.05068999528884888, "learning_rate": 2.758007117437722e-07, "loss": -0.0182, "num_tokens": 230259999.0, "reward": 1.2983078956604004, "reward_std": 0.10855422168970108, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.7253687381744385, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7647379040718079, "step": 470 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/max_terminated_length": 498.0, "completions/mean_length": 228.24609375, "completions/mean_terminated_length": 228.24609375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.7536, "grad_norm": 0.036536820232868195, "learning_rate": 2.7402135231316724e-07, "loss": -0.0054, "num_tokens": 230736478.0, "reward": 1.6196913719177246, "reward_std": 0.10245537757873535, "rewards/accuracy_reward_long_step": 0.7109375, "rewards/final_brier_reward_long_step": 0.8031273484230042, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8318880796432495, "step": 471 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 230.26171875, "completions/mean_terminated_length": 230.26171875, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.7552, "grad_norm": 0.040038324892520905, "learning_rate": 2.7224199288256225e-07, "loss": 0.0134, "num_tokens": 231215745.0, "reward": 1.253148078918457, "reward_std": 0.10847177356481552, "rewards/accuracy_reward_long_step": 0.3359375, "rewards/final_brier_reward_long_step": 0.8157835602760315, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8530591130256653, "step": 472 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 228.953125, "completions/mean_terminated_length": 228.953125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.7568, "grad_norm": 0.03876092657446861, "learning_rate": 2.704626334519573e-07, "loss": -0.002, "num_tokens": 231688509.0, "reward": 1.2908313274383545, "reward_std": 0.11586640775203705, "rewards/accuracy_reward_long_step": 0.38671875, "rewards/final_brier_reward_long_step": 0.7823525667190552, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8340977430343628, "step": 473 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/max_terminated_length": 468.0, "completions/mean_length": 224.69921875, "completions/mean_terminated_length": 224.69921875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.7584, "grad_norm": 0.03896433115005493, "learning_rate": 2.6868327402135234e-07, "loss": 0.0022, "num_tokens": 232176304.0, "reward": 1.4751133918762207, "reward_std": 0.09986962378025055, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.8110538721084595, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8237748742103577, "step": 474 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 586.0, "completions/max_terminated_length": 586.0, "completions/mean_length": 241.28125, "completions/mean_terminated_length": 241.28125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.76, "grad_norm": 0.04798499867320061, "learning_rate": 2.669039145907473e-07, "loss": 0.0133, "num_tokens": 232659224.0, "reward": 1.4235737323760986, "reward_std": 0.14305740594863892, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.6940581798553467, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7736741304397583, "step": 475 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 580.0, "completions/max_terminated_length": 580.0, "completions/mean_length": 248.6875, "completions/mean_terminated_length": 248.6875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.7616, "grad_norm": 0.0414130873978138, "learning_rate": 2.651245551601423e-07, "loss": 0.0079, "num_tokens": 233153904.0, "reward": 1.4499199390411377, "reward_std": 0.13342790305614471, "rewards/accuracy_reward_long_step": 0.53515625, "rewards/final_brier_reward_long_step": 0.8312417268753052, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8278130292892456, "step": 476 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 578.0, "completions/max_terminated_length": 578.0, "completions/mean_length": 244.85546875, "completions/mean_terminated_length": 244.85546875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.7632, "grad_norm": 0.03957832232117653, "learning_rate": 2.6334519572953733e-07, "loss": -0.0045, "num_tokens": 233630819.0, "reward": 1.3480762243270874, "reward_std": 0.1540793478488922, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7323105335235596, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7693694233894348, "step": 477 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/max_terminated_length": 429.0, "completions/mean_length": 217.80078125, "completions/mean_terminated_length": 217.80078125, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.7648, "grad_norm": 0.040704309940338135, "learning_rate": 2.6156583629893234e-07, "loss": 0.002, "num_tokens": 234112184.0, "reward": 1.4834253787994385, "reward_std": 0.176324263215065, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.7984238266944885, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7759029865264893, "step": 478 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/max_terminated_length": 468.0, "completions/mean_length": 225.7578125, "completions/mean_terminated_length": 225.7578125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.7664, "grad_norm": 0.039327558130025864, "learning_rate": 2.597864768683274e-07, "loss": 0.0018, "num_tokens": 234606322.0, "reward": 1.312846064567566, "reward_std": 0.09807312488555908, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7961425185203552, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7833666205406189, "step": 479 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 603.0, "completions/max_terminated_length": 603.0, "completions/mean_length": 224.43359375, "completions/mean_terminated_length": 224.43359375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.768, "grad_norm": 0.05755281448364258, "learning_rate": 2.580071174377224e-07, "loss": 0.0044, "num_tokens": 235077329.0, "reward": 1.3882198333740234, "reward_std": 0.12557154893875122, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.7875798344612122, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.780924916267395, "step": 480 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 630.0, "completions/max_terminated_length": 630.0, "completions/mean_length": 230.8125, "completions/mean_terminated_length": 231.71766662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.7696, "grad_norm": 0.04049382731318474, "learning_rate": 2.5622775800711744e-07, "loss": -0.0087, "num_tokens": 235557009.0, "reward": 1.3864805698394775, "reward_std": 0.1668914556503296, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.7072670459747314, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.8777177929878235, "step": 481 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/max_terminated_length": 487.0, "completions/mean_length": 232.35546875, "completions/mean_terminated_length": 232.35546875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.7712, "grad_norm": 0.04888352006673813, "learning_rate": 2.5444839857651245e-07, "loss": 0.0026, "num_tokens": 236049124.0, "reward": 1.5799849033355713, "reward_std": 0.10926343500614166, "rewards/accuracy_reward_long_step": 0.68359375, "rewards/final_brier_reward_long_step": 0.763283908367157, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8222807049751282, "step": 482 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/max_terminated_length": 469.0, "completions/mean_length": 233.8671875, "completions/mean_terminated_length": 233.8671875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.7728, "grad_norm": 0.035025861114263535, "learning_rate": 2.5266903914590747e-07, "loss": -0.0001, "num_tokens": 236535578.0, "reward": 1.404916524887085, "reward_std": 0.10989418625831604, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.8273136615753174, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.839227557182312, "step": 483 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/max_terminated_length": 463.0, "completions/mean_length": 229.078125, "completions/mean_terminated_length": 229.078125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.7744, "grad_norm": 0.03963632136583328, "learning_rate": 2.508896797153025e-07, "loss": -0.0033, "num_tokens": 237017478.0, "reward": 1.5163967609405518, "reward_std": 0.15413016080856323, "rewards/accuracy_reward_long_step": 0.64453125, "rewards/final_brier_reward_long_step": 0.6957645416259766, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7916977405548096, "step": 484 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/max_terminated_length": 445.0, "completions/mean_length": 233.1484375, "completions/mean_terminated_length": 233.1484375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.776, "grad_norm": 0.0386706106364727, "learning_rate": 2.491103202846975e-07, "loss": 0.0037, "num_tokens": 237493188.0, "reward": 1.385927677154541, "reward_std": 0.19464275240898132, "rewards/accuracy_reward_long_step": 0.4921875, "rewards/final_brier_reward_long_step": 0.7536214590072632, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8213395476341248, "step": 485 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 522.0, "completions/max_terminated_length": 522.0, "completions/mean_length": 238.83984375, "completions/mean_terminated_length": 238.83984375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.7776, "grad_norm": 0.040338389575481415, "learning_rate": 2.473309608540925e-07, "loss": 0.0038, "num_tokens": 237987779.0, "reward": 1.5493258237838745, "reward_std": 0.15063825249671936, "rewards/accuracy_reward_long_step": 0.66015625, "rewards/final_brier_reward_long_step": 0.7311683893203735, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8255099058151245, "step": 486 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/max_terminated_length": 467.0, "completions/mean_length": 219.1171875, "completions/mean_terminated_length": 219.1171875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.7792, "grad_norm": 0.059350065886974335, "learning_rate": 2.455516014234875e-07, "loss": 0.0032, "num_tokens": 238479841.0, "reward": 1.5190156698226929, "reward_std": 0.1180032342672348, "rewards/accuracy_reward_long_step": 0.625, "rewards/final_brier_reward_long_step": 0.7632279396057129, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8128350973129272, "step": 487 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 595.0, "completions/max_terminated_length": 595.0, "completions/mean_length": 239.76953125, "completions/mean_terminated_length": 239.76953125, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.7808, "grad_norm": 0.04275665804743767, "learning_rate": 2.4377224199288254e-07, "loss": 0.0087, "num_tokens": 238974614.0, "reward": 1.480884313583374, "reward_std": 0.11418268084526062, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.7228184938430786, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8413438200950623, "step": 488 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 570.0, "completions/max_terminated_length": 570.0, "completions/mean_length": 235.77734375, "completions/mean_terminated_length": 235.77734375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.7824, "grad_norm": 0.04414826259016991, "learning_rate": 2.4199288256227755e-07, "loss": -0.0019, "num_tokens": 239466805.0, "reward": 1.2463829517364502, "reward_std": 0.14157749712467194, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.6166784763336182, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7751035094261169, "step": 489 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.0, "completions/max_terminated_length": 510.0, "completions/mean_length": 246.86328125, "completions/mean_terminated_length": 246.86328125, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.784, "grad_norm": 0.035570476204156876, "learning_rate": 2.4021352313167257e-07, "loss": 0.0011, "num_tokens": 239961386.0, "reward": 1.3018330335617065, "reward_std": 0.10914282500743866, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.7352542877197266, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8470777869224548, "step": 490 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 553.0, "completions/max_terminated_length": 553.0, "completions/mean_length": 238.81640625, "completions/mean_terminated_length": 238.81640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.7856, "grad_norm": 0.04079211875796318, "learning_rate": 2.3843416370106764e-07, "loss": 0.0092, "num_tokens": 240441995.0, "reward": 1.3198940753936768, "reward_std": 0.1449519544839859, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.6919292211532593, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7595219612121582, "step": 491 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 551.0, "completions/max_terminated_length": 551.0, "completions/mean_length": 239.6015625, "completions/mean_terminated_length": 239.6015625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.7872, "grad_norm": 0.051125992089509964, "learning_rate": 2.366548042704626e-07, "loss": 0.0046, "num_tokens": 240913021.0, "reward": 1.5111445188522339, "reward_std": 0.1414456069469452, "rewards/accuracy_reward_long_step": 0.609375, "rewards/final_brier_reward_long_step": 0.7685543298721313, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.838523805141449, "step": 492 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/max_terminated_length": 408.0, "completions/mean_length": 224.24609375, "completions/mean_terminated_length": 224.24609375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.7888, "grad_norm": 0.0370771661400795, "learning_rate": 2.3487544483985764e-07, "loss": 0.011, "num_tokens": 241386988.0, "reward": 1.56308913230896, "reward_std": 0.10970332473516464, "rewards/accuracy_reward_long_step": 0.66015625, "rewards/final_brier_reward_long_step": 0.8099468946456909, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8017843961715698, "step": 493 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/max_terminated_length": 472.0, "completions/mean_length": 240.046875, "completions/mean_terminated_length": 240.046875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.7904, "grad_norm": 0.0435328409075737, "learning_rate": 2.3309608540925265e-07, "loss": 0.0084, "num_tokens": 241883192.0, "reward": 1.4550728797912598, "reward_std": 0.056624144315719604, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.8555335998535156, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7928834557533264, "step": 494 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 718.0, "completions/max_terminated_length": 718.0, "completions/mean_length": 237.8515625, "completions/mean_terminated_length": 237.8515625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.792, "grad_norm": 0.03587677702307701, "learning_rate": 2.313167259786477e-07, "loss": 0.0089, "num_tokens": 242375578.0, "reward": 1.5103130340576172, "reward_std": 0.10776931047439575, "rewards/accuracy_reward_long_step": 0.59765625, "rewards/final_brier_reward_long_step": 0.8439902067184448, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8066369295120239, "step": 495 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 945.0, "completions/max_terminated_length": 945.0, "completions/mean_length": 239.59375, "completions/mean_terminated_length": 239.59375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.7936, "grad_norm": 0.041137006133794785, "learning_rate": 2.295373665480427e-07, "loss": 0.0062, "num_tokens": 242859330.0, "reward": 1.4777976274490356, "reward_std": 0.14235195517539978, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.8539682030677795, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7759724259376526, "step": 496 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 544.0, "completions/max_terminated_length": 544.0, "completions/mean_length": 230.46484375, "completions/mean_terminated_length": 230.46484375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.7952, "grad_norm": 0.03613395616412163, "learning_rate": 2.277580071174377e-07, "loss": -0.0055, "num_tokens": 243345321.0, "reward": 1.5040514469146729, "reward_std": 0.1266600787639618, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.8192323446273804, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7750980257987976, "step": 497 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/max_terminated_length": 378.0, "completions/mean_length": 226.73828125, "completions/mean_terminated_length": 226.73828125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.7968, "grad_norm": 0.04258696362376213, "learning_rate": 2.2597864768683274e-07, "loss": -0.0045, "num_tokens": 243827942.0, "reward": 1.3869428634643555, "reward_std": 0.1397380828857422, "rewards/accuracy_reward_long_step": 0.4921875, "rewards/final_brier_reward_long_step": 0.7650054693222046, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8140161037445068, "step": 498 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/max_terminated_length": 530.0, "completions/mean_length": 240.43359375, "completions/mean_terminated_length": 240.43359375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.7984, "grad_norm": 0.03320414200425148, "learning_rate": 2.2419928825622775e-07, "loss": -0.0118, "num_tokens": 244325933.0, "reward": 1.4908723831176758, "reward_std": 0.16621750593185425, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.7513167858123779, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7902982234954834, "step": 499 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/max_terminated_length": 454.0, "completions/mean_length": 226.42578125, "completions/mean_terminated_length": 226.42578125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.8, "grad_norm": 0.03856438770890236, "learning_rate": 2.2241992882562277e-07, "loss": 0.0033, "num_tokens": 244805666.0, "reward": 1.5031077861785889, "reward_std": 0.12228081375360489, "rewards/accuracy_reward_long_step": 0.59375, "rewards/final_brier_reward_long_step": 0.8210663795471191, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8163642883300781, "step": 500 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/max_terminated_length": 433.0, "completions/mean_length": 220.3984375, "completions/mean_terminated_length": 220.3984375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.8016, "grad_norm": 0.0355200357735157, "learning_rate": 2.206405693950178e-07, "loss": -0.0003, "num_tokens": 245298544.0, "reward": 1.5138522386550903, "reward_std": 0.15135133266448975, "rewards/accuracy_reward_long_step": 0.578125, "rewards/final_brier_reward_long_step": 0.8448459506034851, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.9058756828308105, "step": 501 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/max_terminated_length": 425.0, "completions/mean_length": 237.34765625, "completions/mean_terminated_length": 237.34765625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.8032, "grad_norm": 0.037190381437540054, "learning_rate": 2.188612099644128e-07, "loss": 0.0034, "num_tokens": 245784137.0, "reward": 1.5037150382995605, "reward_std": 0.15791866183280945, "rewards/accuracy_reward_long_step": 0.609375, "rewards/final_brier_reward_long_step": 0.7559190988540649, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8214409947395325, "step": 502 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 555.0, "completions/max_terminated_length": 555.0, "completions/mean_length": 224.734375, "completions/mean_terminated_length": 224.734375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.8048, "grad_norm": 0.0395224392414093, "learning_rate": 2.170818505338078e-07, "loss": 0.0042, "num_tokens": 246249421.0, "reward": 1.5526416301727295, "reward_std": 0.11045798659324646, "rewards/accuracy_reward_long_step": 0.6484375, "rewards/final_brier_reward_long_step": 0.818356990814209, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7984594702720642, "step": 503 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 514.0, "completions/max_terminated_length": 514.0, "completions/mean_length": 236.96484375, "completions/mean_terminated_length": 236.96484375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.8064, "grad_norm": 0.03467653691768646, "learning_rate": 2.1530249110320285e-07, "loss": 0.0056, "num_tokens": 246741180.0, "reward": 1.4294129610061646, "reward_std": 0.15054050087928772, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.730989396572113, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7522871494293213, "step": 504 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/max_terminated_length": 491.0, "completions/mean_length": 244.40234375, "completions/mean_terminated_length": 244.40234375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.808, "grad_norm": 0.03365206718444824, "learning_rate": 2.1352313167259786e-07, "loss": -0.0079, "num_tokens": 247235891.0, "reward": 1.3486175537109375, "reward_std": 0.10975901782512665, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.8065632581710815, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8222817182540894, "step": 505 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 522.0, "completions/max_terminated_length": 522.0, "completions/mean_length": 230.9921875, "completions/mean_terminated_length": 230.9921875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.8096, "grad_norm": 0.041645560413599014, "learning_rate": 2.1174377224199288e-07, "loss": 0.0051, "num_tokens": 247714529.0, "reward": 1.614863395690918, "reward_std": 0.1503724455833435, "rewards/accuracy_reward_long_step": 0.703125, "rewards/final_brier_reward_long_step": 0.7402952909469604, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.9066582918167114, "step": 506 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 606.0, "completions/max_terminated_length": 606.0, "completions/mean_length": 236.5625, "completions/mean_terminated_length": 236.5625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.8112, "grad_norm": 0.045049868524074554, "learning_rate": 2.099644128113879e-07, "loss": -0.0034, "num_tokens": 248209161.0, "reward": 1.312518835067749, "reward_std": 0.1855650395154953, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.7521023154258728, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7323479056358337, "step": 507 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/max_terminated_length": 455.0, "completions/mean_length": 229.140625, "completions/mean_terminated_length": 229.140625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.8128, "grad_norm": 0.047168269753456116, "learning_rate": 2.081850533807829e-07, "loss": 0.0165, "num_tokens": 248688005.0, "reward": 1.4646296501159668, "reward_std": 0.09372396022081375, "rewards/accuracy_reward_long_step": 0.57421875, "rewards/final_brier_reward_long_step": 0.774226188659668, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7874171733856201, "step": 508 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/max_terminated_length": 486.0, "completions/mean_length": 234.80078125, "completions/mean_terminated_length": 234.80078125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.8144, "grad_norm": 0.04063938930630684, "learning_rate": 2.0640569395017792e-07, "loss": -0.015, "num_tokens": 249176586.0, "reward": 1.3976056575775146, "reward_std": 0.1380692720413208, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.6868070363998413, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7161159515380859, "step": 509 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/max_terminated_length": 457.0, "completions/mean_length": 234.921875, "completions/mean_terminated_length": 234.921875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.816, "grad_norm": 0.04220227152109146, "learning_rate": 2.0462633451957296e-07, "loss": -0.0111, "num_tokens": 249668526.0, "reward": 1.4461275339126587, "reward_std": 0.17553001642227173, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.7025785446166992, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7381817102432251, "step": 510 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/max_terminated_length": 435.0, "completions/mean_length": 223.1015625, "completions/mean_terminated_length": 223.1015625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.8176, "grad_norm": 0.04910369962453842, "learning_rate": 2.0284697508896798e-07, "loss": 0.013, "num_tokens": 250138040.0, "reward": 1.480068325996399, "reward_std": 0.17966505885124207, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.8528038263320923, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.9112191200256348, "step": 511 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/max_terminated_length": 389.0, "completions/mean_length": 235.9375, "completions/mean_terminated_length": 235.9375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.8192, "grad_norm": 0.03783193230628967, "learning_rate": 2.0106761565836297e-07, "loss": 0.0023, "num_tokens": 250629424.0, "reward": 1.5700932741165161, "reward_std": 0.12437284737825394, "rewards/accuracy_reward_long_step": 0.66015625, "rewards/final_brier_reward_long_step": 0.8810421824455261, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7587060928344727, "step": 512 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.0, "completions/max_terminated_length": 510.0, "completions/mean_length": 255.25390625, "completions/mean_terminated_length": 255.25390625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.8208, "grad_norm": 0.052237872034311295, "learning_rate": 1.99288256227758e-07, "loss": -0.0014, "num_tokens": 251129969.0, "reward": 1.3335564136505127, "reward_std": 0.13581520318984985, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.7354176044464111, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7863079905509949, "step": 513 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 227.5546875, "completions/mean_terminated_length": 227.5546875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.8224, "grad_norm": 0.03678586706519127, "learning_rate": 1.9750889679715302e-07, "loss": -0.0089, "num_tokens": 251612919.0, "reward": 1.4214198589324951, "reward_std": 0.1188969761133194, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.7470394372940063, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8292652368545532, "step": 514 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 514.0, "completions/max_terminated_length": 514.0, "completions/mean_length": 245.015625, "completions/mean_terminated_length": 245.015625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.824, "grad_norm": 0.05070950463414192, "learning_rate": 1.9572953736654804e-07, "loss": 0.0063, "num_tokens": 252120547.0, "reward": 1.5003015995025635, "reward_std": 0.14131146669387817, "rewards/accuracy_reward_long_step": 0.609375, "rewards/final_brier_reward_long_step": 0.7343014478683472, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8294050693511963, "step": 515 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 586.0, "completions/max_terminated_length": 586.0, "completions/mean_length": 234.69921875, "completions/mean_terminated_length": 234.69921875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.8256, "grad_norm": 0.04203731194138527, "learning_rate": 1.9395017793594305e-07, "loss": 0.0013, "num_tokens": 252610774.0, "reward": 1.4129828214645386, "reward_std": 0.09008841961622238, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.7625414133071899, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8425147533416748, "step": 516 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 244.05078125, "completions/mean_terminated_length": 244.05078125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.8272, "grad_norm": 0.04504576325416565, "learning_rate": 1.9217081850533807e-07, "loss": 0.0092, "num_tokens": 253084835.0, "reward": 1.3626664876937866, "reward_std": 0.13317476212978363, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.7290824055671692, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7372086048126221, "step": 517 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 535.0, "completions/max_terminated_length": 535.0, "completions/mean_length": 235.015625, "completions/mean_terminated_length": 235.015625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.8288, "grad_norm": 0.04393794387578964, "learning_rate": 1.9039145907473308e-07, "loss": 0.014, "num_tokens": 253571311.0, "reward": 1.5000429153442383, "reward_std": 0.13177230954170227, "rewards/accuracy_reward_long_step": 0.5859375, "rewards/final_brier_reward_long_step": 0.7909968495368958, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8654246926307678, "step": 518 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 596.0, "completions/max_terminated_length": 596.0, "completions/mean_length": 247.33203125, "completions/mean_terminated_length": 247.33203125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.8304, "grad_norm": 0.03598223626613617, "learning_rate": 1.8861209964412812e-07, "loss": 0.0185, "num_tokens": 254043356.0, "reward": 1.3758127689361572, "reward_std": 0.17646163702011108, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7502039074897766, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8624222278594971, "step": 519 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/max_terminated_length": 550.0, "completions/mean_length": 236.67578125, "completions/mean_terminated_length": 236.67578125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.832, "grad_norm": 0.03368176519870758, "learning_rate": 1.8683274021352314e-07, "loss": 0.0061, "num_tokens": 254522601.0, "reward": 1.4897700548171997, "reward_std": 0.10578904300928116, "rewards/accuracy_reward_long_step": 0.58984375, "rewards/final_brier_reward_long_step": 0.7538363337516785, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8458687663078308, "step": 520 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 230.203125, "completions/mean_terminated_length": 230.203125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.8336, "grad_norm": 0.05814650282263756, "learning_rate": 1.8505338078291812e-07, "loss": 0.0063, "num_tokens": 254993893.0, "reward": 1.419898271560669, "reward_std": 0.11159418523311615, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.7294105291366577, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7939323782920837, "step": 521 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 633.0, "completions/max_terminated_length": 633.0, "completions/mean_length": 240.46875, "completions/mean_terminated_length": 240.46875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.8352, "grad_norm": 0.03419061005115509, "learning_rate": 1.8327402135231316e-07, "loss": -0.0096, "num_tokens": 255490549.0, "reward": 1.4004169702529907, "reward_std": 0.12270954251289368, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.8064777851104736, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8889400959014893, "step": 522 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/max_terminated_length": 499.0, "completions/mean_length": 242.21875, "completions/mean_terminated_length": 242.21875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.8368, "grad_norm": 0.036356884986162186, "learning_rate": 1.8149466192170818e-07, "loss": -0.0055, "num_tokens": 255991109.0, "reward": 1.4097753763198853, "reward_std": 0.10558044910430908, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7266496419906616, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8343270421028137, "step": 523 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 235.08203125, "completions/mean_terminated_length": 235.08203125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.8384, "grad_norm": 0.04891684278845787, "learning_rate": 1.797153024911032e-07, "loss": 0.0014, "num_tokens": 256465850.0, "reward": 1.4318642616271973, "reward_std": 0.12251758575439453, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.768867552280426, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8023396730422974, "step": 524 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 477.0, "completions/max_terminated_length": 477.0, "completions/mean_length": 231.28125, "completions/mean_terminated_length": 231.28125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.84, "grad_norm": 0.04874948784708977, "learning_rate": 1.7793594306049823e-07, "loss": -0.0015, "num_tokens": 256950562.0, "reward": 1.454651117324829, "reward_std": 0.15635967254638672, "rewards/accuracy_reward_long_step": 0.578125, "rewards/final_brier_reward_long_step": 0.686775803565979, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8193286657333374, "step": 525 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/max_terminated_length": 503.0, "completions/mean_length": 247.87109375, "completions/mean_terminated_length": 247.87109375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.8416, "grad_norm": 0.05547276511788368, "learning_rate": 1.7615658362989322e-07, "loss": 0.0132, "num_tokens": 257429993.0, "reward": 1.3650845289230347, "reward_std": 0.16451743245124817, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.779395341873169, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8059428930282593, "step": 526 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 786.0, "completions/max_terminated_length": 786.0, "completions/mean_length": 250.98828125, "completions/mean_terminated_length": 250.98828125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.8432, "grad_norm": 0.035361409187316895, "learning_rate": 1.7437722419928824e-07, "loss": 0.0091, "num_tokens": 257921134.0, "reward": 1.50462007522583, "reward_std": 0.12116993218660355, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.7998992204666138, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7967061400413513, "step": 527 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 548.0, "completions/max_terminated_length": 548.0, "completions/mean_length": 253.18359375, "completions/mean_terminated_length": 253.18359375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.8448, "grad_norm": 0.03830602765083313, "learning_rate": 1.7259786476868328e-07, "loss": -0.0055, "num_tokens": 258402709.0, "reward": 1.3311948776245117, "reward_std": 0.11117161065340042, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.7396460771560669, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7257586717605591, "step": 528 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/max_terminated_length": 427.0, "completions/mean_length": 249.9921875, "completions/mean_terminated_length": 249.9921875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.8464, "grad_norm": 0.04274242743849754, "learning_rate": 1.708185053380783e-07, "loss": 0.0085, "num_tokens": 258892547.0, "reward": 1.5267962217330933, "reward_std": 0.10823096334934235, "rewards/accuracy_reward_long_step": 0.609375, "rewards/final_brier_reward_long_step": 0.8286827802658081, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8410018086433411, "step": 529 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 533.0, "completions/max_terminated_length": 533.0, "completions/mean_length": 247.6875, "completions/mean_terminated_length": 247.6875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.848, "grad_norm": 0.046045657247304916, "learning_rate": 1.690391459074733e-07, "loss": -0.0093, "num_tokens": 259389531.0, "reward": 1.3362867832183838, "reward_std": 0.10417808592319489, "rewards/accuracy_reward_long_step": 0.4296875, "rewards/final_brier_reward_long_step": 0.8625573515892029, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7638399600982666, "step": 530 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/max_terminated_length": 503.0, "completions/mean_length": 237.73828125, "completions/mean_terminated_length": 237.73828125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.8496, "grad_norm": 0.055190231651067734, "learning_rate": 1.6725978647686832e-07, "loss": -0.0043, "num_tokens": 259886432.0, "reward": 1.4131598472595215, "reward_std": 0.12312982231378555, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.6990100145339966, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7817543745040894, "step": 531 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/max_terminated_length": 386.0, "completions/mean_length": 232.515625, "completions/mean_terminated_length": 232.515625, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.8512, "grad_norm": 0.03550275042653084, "learning_rate": 1.6548042704626334e-07, "loss": -0.0068, "num_tokens": 260358252.0, "reward": 1.4527807235717773, "reward_std": 0.13987179100513458, "rewards/accuracy_reward_long_step": 0.578125, "rewards/final_brier_reward_long_step": 0.7386081218719482, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7678276300430298, "step": 532 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 606.0, "completions/max_terminated_length": 606.0, "completions/mean_length": 237.8671875, "completions/mean_terminated_length": 238.80001831054688, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.8528, "grad_norm": 0.040301430970430374, "learning_rate": 1.6370106761565835e-07, "loss": -0.024, "num_tokens": 260832714.0, "reward": 1.428901195526123, "reward_std": 0.09275516867637634, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.776361346244812, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8533056974411011, "step": 533 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/max_terminated_length": 462.0, "completions/mean_length": 238.7578125, "completions/mean_terminated_length": 238.7578125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.8544, "grad_norm": 0.04402696341276169, "learning_rate": 1.619217081850534e-07, "loss": 0.0134, "num_tokens": 261320468.0, "reward": 1.4897812604904175, "reward_std": 0.09281490743160248, "rewards/accuracy_reward_long_step": 0.59765625, "rewards/final_brier_reward_long_step": 0.7256316542625427, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.842868447303772, "step": 534 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 517.0, "completions/max_terminated_length": 517.0, "completions/mean_length": 227.56640625, "completions/mean_terminated_length": 228.45883178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.856, "grad_norm": 0.04461900517344475, "learning_rate": 1.601423487544484e-07, "loss": -0.0036, "num_tokens": 261802045.0, "reward": 1.5405223369598389, "reward_std": 0.1693045049905777, "rewards/accuracy_reward_long_step": 0.66015625, "rewards/final_brier_reward_long_step": 0.6963077783584595, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8329689502716064, "step": 535 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 630.0, "completions/max_terminated_length": 630.0, "completions/mean_length": 243.41015625, "completions/mean_terminated_length": 243.41015625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.8576, "grad_norm": 0.0438154935836792, "learning_rate": 1.583629893238434e-07, "loss": 0.0014, "num_tokens": 262282190.0, "reward": 1.3707561492919922, "reward_std": 0.16878756880760193, "rewards/accuracy_reward_long_step": 0.4609375, "rewards/final_brier_reward_long_step": 0.7976784706115723, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8415963649749756, "step": 536 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 528.0, "completions/max_terminated_length": 528.0, "completions/mean_length": 233.390625, "completions/mean_terminated_length": 233.390625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.8592, "grad_norm": 0.04220299795269966, "learning_rate": 1.5658362989323843e-07, "loss": 0.0062, "num_tokens": 262769162.0, "reward": 1.5402344465255737, "reward_std": 0.12223983556032181, "rewards/accuracy_reward_long_step": 0.609375, "rewards/final_brier_reward_long_step": 0.8557167053222656, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8677208423614502, "step": 537 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 601.0, "completions/max_terminated_length": 601.0, "completions/mean_length": 235.33203125, "completions/mean_terminated_length": 235.33203125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.8608, "grad_norm": 0.03791709989309311, "learning_rate": 1.5480427046263345e-07, "loss": -0.0013, "num_tokens": 263247303.0, "reward": 1.5486342906951904, "reward_std": 0.11805526912212372, "rewards/accuracy_reward_long_step": 0.65625, "rewards/final_brier_reward_long_step": 0.7473232746124268, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8222134709358215, "step": 538 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 540.0, "completions/max_terminated_length": 540.0, "completions/mean_length": 238.59375, "completions/mean_terminated_length": 238.59375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.8624, "grad_norm": 0.04007653519511223, "learning_rate": 1.5302491103202846e-07, "loss": -0.0057, "num_tokens": 263739847.0, "reward": 1.5600248575210571, "reward_std": 0.14350242912769318, "rewards/accuracy_reward_long_step": 0.67578125, "rewards/final_brier_reward_long_step": 0.7258714437484741, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8111032247543335, "step": 539 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 541.0, "completions/max_terminated_length": 541.0, "completions/mean_length": 250.05859375, "completions/mean_terminated_length": 250.05859375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.864, "grad_norm": 0.03347768262028694, "learning_rate": 1.512455516014235e-07, "loss": -0.0126, "num_tokens": 264235814.0, "reward": 1.495011568069458, "reward_std": 0.08270839601755142, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.8477886915206909, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8822580575942993, "step": 540 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 613.0, "completions/max_terminated_length": 613.0, "completions/mean_length": 261.34375, "completions/mean_terminated_length": 261.34375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.8656, "grad_norm": 0.03869554027915001, "learning_rate": 1.494661921708185e-07, "loss": -0.0073, "num_tokens": 264741902.0, "reward": 1.3477532863616943, "reward_std": 0.1298760026693344, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.7019554376602173, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8140577077865601, "step": 541 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 724.0, "completions/max_terminated_length": 724.0, "completions/mean_length": 240.37890625, "completions/mean_terminated_length": 240.37890625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.8672, "grad_norm": 0.036735206842422485, "learning_rate": 1.476868327402135e-07, "loss": 0.0053, "num_tokens": 265240223.0, "reward": 1.2603323459625244, "reward_std": 0.05455077812075615, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.7330679893493652, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8082613945007324, "step": 542 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 865.0, "completions/max_terminated_length": 865.0, "completions/mean_length": 226.53515625, "completions/mean_terminated_length": 226.53515625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.8688, "grad_norm": 0.04960142448544502, "learning_rate": 1.4590747330960855e-07, "loss": 0.0033, "num_tokens": 265742248.0, "reward": 1.5600826740264893, "reward_std": 0.10487354546785355, "rewards/accuracy_reward_long_step": 0.65625, "rewards/final_brier_reward_long_step": 0.7979686260223389, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8173620700836182, "step": 543 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/max_terminated_length": 665.0, "completions/mean_length": 251.0546875, "completions/mean_terminated_length": 251.0546875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.8704, "grad_norm": 0.03681975230574608, "learning_rate": 1.4412811387900356e-07, "loss": 0.0118, "num_tokens": 266235870.0, "reward": 1.3697454929351807, "reward_std": 0.14522971212863922, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7746487855911255, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.798083484172821, "step": 544 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 229.875, "completions/mean_terminated_length": 229.875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.872, "grad_norm": 0.03889426216483116, "learning_rate": 1.4234875444839858e-07, "loss": -0.0055, "num_tokens": 266730838.0, "reward": 1.3409353494644165, "reward_std": 0.18033601343631744, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.7062370777130127, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7981289625167847, "step": 545 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/max_terminated_length": 483.0, "completions/mean_length": 254.52734375, "completions/mean_terminated_length": 254.52734375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.8736, "grad_norm": 0.04387963190674782, "learning_rate": 1.405693950177936e-07, "loss": 0.0065, "num_tokens": 267218405.0, "reward": 1.3384535312652588, "reward_std": 0.112638920545578, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.725222647190094, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8317165374755859, "step": 546 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 579.0, "completions/max_terminated_length": 579.0, "completions/mean_length": 239.703125, "completions/mean_terminated_length": 239.703125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.8752, "grad_norm": 0.03998541459441185, "learning_rate": 1.387900355871886e-07, "loss": -0.0061, "num_tokens": 267706177.0, "reward": 1.5569477081298828, "reward_std": 0.1203605979681015, "rewards/accuracy_reward_long_step": 0.6484375, "rewards/final_brier_reward_long_step": 0.8150613307952881, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8189792633056641, "step": 547 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/max_terminated_length": 505.0, "completions/mean_length": 247.90625, "completions/mean_terminated_length": 247.90625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.8768, "grad_norm": 0.040950994938611984, "learning_rate": 1.3701067615658362e-07, "loss": -0.0104, "num_tokens": 268198305.0, "reward": 1.3657258749008179, "reward_std": 0.1606340855360031, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.8146769404411316, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8826013803482056, "step": 548 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.0, "completions/max_terminated_length": 416.0, "completions/mean_length": 231.60546875, "completions/mean_terminated_length": 231.60546875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.8784, "grad_norm": 0.050138987600803375, "learning_rate": 1.3523131672597866e-07, "loss": -0.0009, "num_tokens": 268659020.0, "reward": 1.3868610858917236, "reward_std": 0.1097867488861084, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.715602695941925, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8474666476249695, "step": 549 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 551.0, "completions/max_terminated_length": 551.0, "completions/mean_length": 224.76171875, "completions/mean_terminated_length": 224.76171875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.88, "grad_norm": 0.058884453028440475, "learning_rate": 1.3345195729537365e-07, "loss": 0.0061, "num_tokens": 269130471.0, "reward": 1.539604902267456, "reward_std": 0.15575310587882996, "rewards/accuracy_reward_long_step": 0.61328125, "rewards/final_brier_reward_long_step": 0.8004753589630127, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.904819130897522, "step": 550 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 574.0, "completions/max_terminated_length": 574.0, "completions/mean_length": 232.984375, "completions/mean_terminated_length": 232.984375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.8816, "grad_norm": 0.04245281219482422, "learning_rate": 1.3167259786476866e-07, "loss": -0.0042, "num_tokens": 269601147.0, "reward": 1.1472269296646118, "reward_std": 0.12594836950302124, "rewards/accuracy_reward_long_step": 0.28125, "rewards/final_brier_reward_long_step": 0.6631394624710083, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.800768256187439, "step": 551 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/max_terminated_length": 400.0, "completions/mean_length": 230.94140625, "completions/mean_terminated_length": 230.94140625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.8832, "grad_norm": 0.0368904173374176, "learning_rate": 1.298932384341637e-07, "loss": 0.0065, "num_tokens": 270091988.0, "reward": 1.429011344909668, "reward_std": 0.12329679727554321, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.7112011313438416, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8173440098762512, "step": 552 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/max_terminated_length": 435.0, "completions/mean_length": 228.328125, "completions/mean_terminated_length": 228.328125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.8848, "grad_norm": 0.03731711953878403, "learning_rate": 1.2811387900355872e-07, "loss": 0.0103, "num_tokens": 270583416.0, "reward": 1.5979522466659546, "reward_std": 0.10820707678794861, "rewards/accuracy_reward_long_step": 0.68359375, "rewards/final_brier_reward_long_step": 0.8128556609153748, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8445781469345093, "step": 553 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 521.0, "completions/max_terminated_length": 521.0, "completions/mean_length": 237.7421875, "completions/mean_terminated_length": 237.7421875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.8864, "grad_norm": 0.03969436511397362, "learning_rate": 1.2633451957295373e-07, "loss": -0.0071, "num_tokens": 271076430.0, "reward": 1.3755825757980347, "reward_std": 0.11075370013713837, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7792448997497559, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8012101054191589, "step": 554 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 246.546875, "completions/mean_terminated_length": 246.546875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.888, "grad_norm": 0.03638119623064995, "learning_rate": 1.2455516014234875e-07, "loss": 0.0094, "num_tokens": 271573154.0, "reward": 1.4438494443893433, "reward_std": 0.14020705223083496, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.7870507836341858, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8320969343185425, "step": 555 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/max_terminated_length": 503.0, "completions/mean_length": 234.90234375, "completions/mean_terminated_length": 234.90234375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.8896, "grad_norm": 0.045414846390485764, "learning_rate": 1.2277580071174376e-07, "loss": 0.0053, "num_tokens": 272051201.0, "reward": 1.4294031858444214, "reward_std": 0.07926599681377411, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.8273754119873047, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7808624505996704, "step": 556 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 735.0, "completions/max_terminated_length": 735.0, "completions/mean_length": 248.7890625, "completions/mean_terminated_length": 248.7890625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.8912, "grad_norm": 0.042798083275556564, "learning_rate": 1.2099644128113878e-07, "loss": -0.0078, "num_tokens": 272544787.0, "reward": 1.5465130805969238, "reward_std": 0.09747041761875153, "rewards/accuracy_reward_long_step": 0.65234375, "rewards/final_brier_reward_long_step": 0.7800741791725159, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7966029644012451, "step": 557 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 536.0, "completions/max_terminated_length": 536.0, "completions/mean_length": 231.265625, "completions/mean_terminated_length": 231.265625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.8928, "grad_norm": 0.03589711710810661, "learning_rate": 1.1921708185053382e-07, "loss": -0.0023, "num_tokens": 273043191.0, "reward": 1.419055461883545, "reward_std": 0.12412445992231369, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.8063081502914429, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8699132204055786, "step": 558 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/max_terminated_length": 465.0, "completions/mean_length": 234.390625, "completions/mean_terminated_length": 234.390625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.8944, "grad_norm": 0.0373535081744194, "learning_rate": 1.1743772241992882e-07, "loss": -0.0117, "num_tokens": 273519939.0, "reward": 1.3120028972625732, "reward_std": 0.15166430175304413, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.6639257669448853, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8184609413146973, "step": 559 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/max_terminated_length": 421.0, "completions/mean_length": 229.53515625, "completions/mean_terminated_length": 229.53515625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.896, "grad_norm": 0.043007586151361465, "learning_rate": 1.1565836298932385e-07, "loss": 0.0086, "num_tokens": 274002692.0, "reward": 1.4279296398162842, "reward_std": 0.13204392790794373, "rewards/accuracy_reward_long_step": 0.546875, "rewards/final_brier_reward_long_step": 0.721155047416687, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8030637502670288, "step": 560 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/max_terminated_length": 500.0, "completions/mean_length": 241.80078125, "completions/mean_terminated_length": 241.80078125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.8976, "grad_norm": 0.04521722346544266, "learning_rate": 1.1387900355871885e-07, "loss": 0.0018, "num_tokens": 274482809.0, "reward": 1.4112411737442017, "reward_std": 0.08869924396276474, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.8020182847976685, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8273211717605591, "step": 561 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/max_terminated_length": 436.0, "completions/mean_length": 237.63671875, "completions/mean_terminated_length": 237.63671875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.8992, "grad_norm": 0.04603974521160126, "learning_rate": 1.1209964412811388e-07, "loss": 0.011, "num_tokens": 274960236.0, "reward": 1.3271350860595703, "reward_std": 0.0897248238325119, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7432428598403931, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8309223055839539, "step": 562 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/max_terminated_length": 481.0, "completions/mean_length": 244.44140625, "completions/mean_terminated_length": 244.44140625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.9008, "grad_norm": 0.04642568156123161, "learning_rate": 1.103202846975089e-07, "loss": -0.0195, "num_tokens": 275447389.0, "reward": 1.245979905128479, "reward_std": 0.11592195183038712, "rewards/accuracy_reward_long_step": 0.3828125, "rewards/final_brier_reward_long_step": 0.720180869102478, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7324886322021484, "step": 563 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/max_terminated_length": 421.0, "completions/mean_length": 218.67578125, "completions/mean_terminated_length": 218.67578125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.9024, "grad_norm": 0.043522898107767105, "learning_rate": 1.085409252669039e-07, "loss": 0.0223, "num_tokens": 275928954.0, "reward": 1.5771305561065674, "reward_std": 0.11938679218292236, "rewards/accuracy_reward_long_step": 0.671875, "rewards/final_brier_reward_long_step": 0.7716304063796997, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8493919372558594, "step": 564 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 623.0, "completions/max_terminated_length": 623.0, "completions/mean_length": 227.44921875, "completions/mean_terminated_length": 227.44921875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.904, "grad_norm": 0.037754617631435394, "learning_rate": 1.0676156583629893e-07, "loss": 0.0003, "num_tokens": 276409509.0, "reward": 1.6633574962615967, "reward_std": 0.13154950737953186, "rewards/accuracy_reward_long_step": 0.75390625, "rewards/final_brier_reward_long_step": 0.8070245981216431, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8385924696922302, "step": 565 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 627.0, "completions/max_terminated_length": 627.0, "completions/mean_length": 245.3046875, "completions/mean_terminated_length": 245.3046875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.9056, "grad_norm": 0.04103442654013634, "learning_rate": 1.0498220640569395e-07, "loss": 0.0011, "num_tokens": 276899091.0, "reward": 1.378248691558838, "reward_std": 0.1689617931842804, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.7711043357849121, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.788765549659729, "step": 566 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.0, "completions/max_terminated_length": 593.0, "completions/mean_length": 254.94140625, "completions/mean_terminated_length": 254.94140625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.9072, "grad_norm": 0.03693830594420433, "learning_rate": 1.0320284697508896e-07, "loss": -0.0089, "num_tokens": 277401420.0, "reward": 1.2199474573135376, "reward_std": 0.18783383071422577, "rewards/accuracy_reward_long_step": 0.3515625, "rewards/final_brier_reward_long_step": 0.6998242139816284, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7737154960632324, "step": 567 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 618.0, "completions/max_terminated_length": 618.0, "completions/mean_length": 240.95703125, "completions/mean_terminated_length": 240.95703125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.9088, "grad_norm": 0.04158253222703934, "learning_rate": 1.0142348754448399e-07, "loss": 0.0082, "num_tokens": 277887441.0, "reward": 1.4272714853286743, "reward_std": 0.17625702917575836, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.776642918586731, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7761929035186768, "step": 568 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 536.0, "completions/max_terminated_length": 536.0, "completions/mean_length": 231.8125, "completions/mean_terminated_length": 231.8125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.9104, "grad_norm": 0.0454368069767952, "learning_rate": 9.9644128113879e-08, "loss": 0.0031, "num_tokens": 278370153.0, "reward": 1.3535585403442383, "reward_std": 0.09445783495903015, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.843848466873169, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8360108137130737, "step": 569 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/max_terminated_length": 452.0, "completions/mean_length": 228.94921875, "completions/mean_terminated_length": 228.94921875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.912, "grad_norm": 0.03921419754624367, "learning_rate": 9.786476868327402e-08, "loss": 0.0004, "num_tokens": 278864260.0, "reward": 1.426851749420166, "reward_std": 0.18421000242233276, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.7497539520263672, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7857784032821655, "step": 570 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/max_terminated_length": 418.0, "completions/mean_length": 236.28125, "completions/mean_terminated_length": 236.28125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.9136, "grad_norm": 0.03796105086803436, "learning_rate": 9.608540925266903e-08, "loss": -0.0075, "num_tokens": 279345732.0, "reward": 1.2733724117279053, "reward_std": 0.17839360237121582, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.6511929631233215, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8016713857650757, "step": 571 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/max_terminated_length": 525.0, "completions/mean_length": 243.11328125, "completions/mean_terminated_length": 243.11328125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.9152, "grad_norm": 0.03715438395738602, "learning_rate": 9.430604982206406e-08, "loss": 0.0104, "num_tokens": 279841417.0, "reward": 1.3816213607788086, "reward_std": 0.12980535626411438, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.7918597459793091, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7815006971359253, "step": 572 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 532.0, "completions/max_terminated_length": 532.0, "completions/mean_length": 242.07421875, "completions/mean_terminated_length": 242.07421875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.9168, "grad_norm": 0.04261266440153122, "learning_rate": 9.252669039145906e-08, "loss": -0.0052, "num_tokens": 280341788.0, "reward": 1.4158134460449219, "reward_std": 0.09869112074375153, "rewards/accuracy_reward_long_step": 0.50390625, "rewards/final_brier_reward_long_step": 0.8082069754600525, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8394216299057007, "step": 573 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 226.296875, "completions/mean_terminated_length": 226.296875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.9184, "grad_norm": 0.07661443948745728, "learning_rate": 9.074733096085409e-08, "loss": -0.0077, "num_tokens": 280823224.0, "reward": 1.532405138015747, "reward_std": 0.08156967163085938, "rewards/accuracy_reward_long_step": 0.64453125, "rewards/final_brier_reward_long_step": 0.7845557928085327, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.766939640045166, "step": 574 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 708.0, "completions/max_terminated_length": 708.0, "completions/mean_length": 240.23046875, "completions/mean_terminated_length": 240.23046875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.92, "grad_norm": 0.05118957906961441, "learning_rate": 8.896797153024912e-08, "loss": -0.01, "num_tokens": 281322563.0, "reward": 1.353409767150879, "reward_std": 0.12275659292936325, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7173187732696533, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8994452357292175, "step": 575 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 577.0, "completions/max_terminated_length": 577.0, "completions/mean_length": 236.58984375, "completions/mean_terminated_length": 236.58984375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.9216, "grad_norm": 0.044467244297266006, "learning_rate": 8.718861209964412e-08, "loss": 0.0093, "num_tokens": 281803666.0, "reward": 1.394946575164795, "reward_std": 0.09921001642942429, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.771274209022522, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7460117340087891, "step": 576 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.0, "completions/max_terminated_length": 374.0, "completions/mean_length": 219.89453125, "completions/mean_terminated_length": 219.89453125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.9232, "grad_norm": 0.04329831525683403, "learning_rate": 8.540925266903915e-08, "loss": -0.0061, "num_tokens": 282247735.0, "reward": 1.3368597030639648, "reward_std": 0.12454381585121155, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.7124074101448059, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7756561636924744, "step": 577 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/max_terminated_length": 447.0, "completions/mean_length": 232.28515625, "completions/mean_terminated_length": 232.28515625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.9248, "grad_norm": 0.043502867221832275, "learning_rate": 8.362989323843416e-08, "loss": -0.0013, "num_tokens": 282739784.0, "reward": 1.2974281311035156, "reward_std": 0.17717978358268738, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.6866112947463989, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7218515872955322, "step": 578 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 229.37890625, "completions/mean_terminated_length": 229.37890625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.9264, "grad_norm": 0.038043197244405746, "learning_rate": 8.185053380782917e-08, "loss": 0.0001, "num_tokens": 283228401.0, "reward": 1.4870903491973877, "reward_std": 0.11480262130498886, "rewards/accuracy_reward_long_step": 0.58203125, "rewards/final_brier_reward_long_step": 0.7964640259742737, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.823772668838501, "step": 579 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/max_terminated_length": 448.0, "completions/mean_length": 231.578125, "completions/mean_terminated_length": 231.578125, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.928, "grad_norm": 0.04364067688584328, "learning_rate": 8.00711743772242e-08, "loss": 0.0038, "num_tokens": 283710941.0, "reward": 1.3739776611328125, "reward_std": 0.0760713666677475, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.8134101629257202, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8700001239776611, "step": 580 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 241.80078125, "completions/mean_terminated_length": 241.80078125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.9296, "grad_norm": 0.04623554274439812, "learning_rate": 7.829181494661922e-08, "loss": -0.0163, "num_tokens": 284203962.0, "reward": 1.3043211698532104, "reward_std": 0.10367835313081741, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.7652456760406494, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7489140033721924, "step": 581 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/max_terminated_length": 665.0, "completions/mean_length": 248.40234375, "completions/mean_terminated_length": 248.40234375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.9312, "grad_norm": 0.046936266124248505, "learning_rate": 7.651245551601423e-08, "loss": 0.0062, "num_tokens": 284689161.0, "reward": 1.2649556398391724, "reward_std": 0.13359886407852173, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.6901007294654846, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7290970087051392, "step": 582 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/max_terminated_length": 409.0, "completions/mean_length": 224.82421875, "completions/mean_terminated_length": 224.82421875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.9328, "grad_norm": 0.048148345202207565, "learning_rate": 7.473309608540925e-08, "loss": 0.0091, "num_tokens": 285176220.0, "reward": 1.545983076095581, "reward_std": 0.10164255648851395, "rewards/accuracy_reward_long_step": 0.64453125, "rewards/final_brier_reward_long_step": 0.7637656331062317, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8420416116714478, "step": 583 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 234.75390625, "completions/mean_terminated_length": 234.75390625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.9344, "grad_norm": 0.0379473976790905, "learning_rate": 7.295373665480427e-08, "loss": -0.0063, "num_tokens": 285682757.0, "reward": 1.407658338546753, "reward_std": 0.1226111352443695, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7624057531356812, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7901023626327515, "step": 584 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/max_terminated_length": 509.0, "completions/mean_length": 235.9140625, "completions/mean_terminated_length": 235.9140625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.936, "grad_norm": 0.04213576763868332, "learning_rate": 7.117437722419929e-08, "loss": 0.0006, "num_tokens": 286148327.0, "reward": 1.3678431510925293, "reward_std": 0.13030381500720978, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7432855367660522, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7905872464179993, "step": 585 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 239.28125, "completions/mean_terminated_length": 239.28125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.9376, "grad_norm": 0.039584312587976456, "learning_rate": 6.93950177935943e-08, "loss": -0.0023, "num_tokens": 286619447.0, "reward": 1.4525396823883057, "reward_std": 0.1256856620311737, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7838070392608643, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.776351809501648, "step": 586 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/max_terminated_length": 472.0, "completions/mean_length": 245.26953125, "completions/mean_terminated_length": 245.26953125, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.9392, "grad_norm": 0.039651911705732346, "learning_rate": 6.761565836298933e-08, "loss": 0.0029, "num_tokens": 287116388.0, "reward": 1.378925085067749, "reward_std": 0.12979546189308167, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.7602671980857849, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7710578441619873, "step": 587 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/max_terminated_length": 519.0, "completions/mean_length": 232.0, "completions/mean_terminated_length": 232.0, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.9408, "grad_norm": 0.05170515552163124, "learning_rate": 6.583629893238433e-08, "loss": -0.0055, "num_tokens": 287604604.0, "reward": 1.4521524906158447, "reward_std": 0.19186797738075256, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.796981692314148, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.777253270149231, "step": 588 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/max_terminated_length": 446.0, "completions/mean_length": 230.9609375, "completions/mean_terminated_length": 230.9609375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.9424, "grad_norm": 0.043422844260931015, "learning_rate": 6.405693950177936e-08, "loss": -0.0021, "num_tokens": 288080906.0, "reward": 1.5288910865783691, "reward_std": 0.10271154344081879, "rewards/accuracy_reward_long_step": 0.61328125, "rewards/final_brier_reward_long_step": 0.816925048828125, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8455142974853516, "step": 589 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 621.0, "completions/max_terminated_length": 621.0, "completions/mean_length": 236.06640625, "completions/mean_terminated_length": 236.06640625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.944, "grad_norm": 0.036451030522584915, "learning_rate": 6.227758007117437e-08, "loss": 0.0043, "num_tokens": 288552851.0, "reward": 1.5121817588806152, "reward_std": 0.1267908215522766, "rewards/accuracy_reward_long_step": 0.62109375, "rewards/final_brier_reward_long_step": 0.7251984477043152, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8391537070274353, "step": 590 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 232.703125, "completions/mean_terminated_length": 232.703125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.9456, "grad_norm": 0.04694944620132446, "learning_rate": 6.049822064056939e-08, "loss": -0.0057, "num_tokens": 289027695.0, "reward": 1.4363982677459717, "reward_std": 0.15798181295394897, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.7334683537483215, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.80899977684021, "step": 591 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 244.984375, "completions/mean_terminated_length": 244.984375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.9472, "grad_norm": 0.0597044937312603, "learning_rate": 5.871886120996441e-08, "loss": -0.016, "num_tokens": 289507067.0, "reward": 1.3889837265014648, "reward_std": 0.08240145444869995, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.8605644702911377, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8672455549240112, "step": 592 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 554.0, "completions/max_terminated_length": 554.0, "completions/mean_length": 233.28515625, "completions/mean_terminated_length": 233.28515625, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.9488, "grad_norm": 0.036467116326093674, "learning_rate": 5.6939501779359424e-08, "loss": 0.0026, "num_tokens": 289991372.0, "reward": 1.28902006149292, "reward_std": 0.11750981956720352, "rewards/accuracy_reward_long_step": 0.4375, "rewards/final_brier_reward_long_step": 0.6445460915565491, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7615340352058411, "step": 593 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 563.0, "completions/max_terminated_length": 563.0, "completions/mean_length": 227.6171875, "completions/mean_terminated_length": 227.6171875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.9504, "grad_norm": 0.09449837356805801, "learning_rate": 5.516014234875445e-08, "loss": 0.0093, "num_tokens": 290484434.0, "reward": 1.574216604232788, "reward_std": 0.11178240180015564, "rewards/accuracy_reward_long_step": 0.69140625, "rewards/final_brier_reward_long_step": 0.7768968343734741, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7543442845344543, "step": 594 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 244.6015625, "completions/mean_terminated_length": 244.6015625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.952, "grad_norm": 0.03990272060036659, "learning_rate": 5.3380782918149466e-08, "loss": 0.0085, "num_tokens": 290989700.0, "reward": 1.508046269416809, "reward_std": 0.10411694645881653, "rewards/accuracy_reward_long_step": 0.6015625, "rewards/final_brier_reward_long_step": 0.8094656467437744, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.824282169342041, "step": 595 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/max_terminated_length": 498.0, "completions/mean_length": 258.6171875, "completions/mean_terminated_length": 258.6171875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.9536, "grad_norm": 0.035313017666339874, "learning_rate": 5.160142348754448e-08, "loss": 0.0136, "num_tokens": 291475994.0, "reward": 1.4012870788574219, "reward_std": 0.130996972322464, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.8204870820045471, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7846609950065613, "step": 596 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 244.3515625, "completions/mean_terminated_length": 244.3515625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.9552, "grad_norm": 0.04008813574910164, "learning_rate": 4.98220640569395e-08, "loss": 0.002, "num_tokens": 291959996.0, "reward": 1.3849501609802246, "reward_std": 0.12151844799518585, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.7529085874557495, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6931424140930176, "step": 597 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 226.16015625, "completions/mean_terminated_length": 226.16015625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.9568, "grad_norm": 0.04041390120983124, "learning_rate": 4.8042704626334516e-08, "loss": -0.0106, "num_tokens": 292435517.0, "reward": 1.3907995223999023, "reward_std": 0.08410888910293579, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7949777245521545, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7760331034660339, "step": 598 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 580.0, "completions/max_terminated_length": 580.0, "completions/mean_length": 244.42578125, "completions/mean_terminated_length": 244.42578125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.9584, "grad_norm": 0.042032286524772644, "learning_rate": 4.626334519572953e-08, "loss": 0.0048, "num_tokens": 292911202.0, "reward": 1.2448697090148926, "reward_std": 0.10349094122648239, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.7020269632339478, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7774521112442017, "step": 599 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/max_terminated_length": 530.0, "completions/mean_length": 230.69921875, "completions/mean_terminated_length": 230.69921875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.96, "grad_norm": 0.04338167607784271, "learning_rate": 4.448398576512456e-08, "loss": 0.0117, "num_tokens": 293390637.0, "reward": 1.550196886062622, "reward_std": 0.1076919287443161, "rewards/accuracy_reward_long_step": 0.6484375, "rewards/final_brier_reward_long_step": 0.8495925664901733, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7574446201324463, "step": 600 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 555.0, "completions/max_terminated_length": 555.0, "completions/mean_length": 242.64453125, "completions/mean_terminated_length": 242.64453125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.9616, "grad_norm": 0.044204358011484146, "learning_rate": 4.270462633451957e-08, "loss": 0.0193, "num_tokens": 293870786.0, "reward": 1.3222875595092773, "reward_std": 0.09255368262529373, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.8258058428764343, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8695942163467407, "step": 601 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/max_terminated_length": 463.0, "completions/mean_length": 238.5390625, "completions/mean_terminated_length": 238.5390625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.9632, "grad_norm": 0.04069282487034798, "learning_rate": 4.092526690391459e-08, "loss": -0.0003, "num_tokens": 294354396.0, "reward": 1.4837543964385986, "reward_std": 0.1288076937198639, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.8007269501686096, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8530406355857849, "step": 602 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 557.0, "completions/max_terminated_length": 557.0, "completions/mean_length": 241.36328125, "completions/mean_terminated_length": 241.36328125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.9648, "grad_norm": 0.0698588415980339, "learning_rate": 3.914590747330961e-08, "loss": -0.0062, "num_tokens": 294831593.0, "reward": 1.3811883926391602, "reward_std": 0.12175668030977249, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.9002180099487305, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7495359182357788, "step": 603 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/max_terminated_length": 455.0, "completions/mean_length": 228.00390625, "completions/mean_terminated_length": 228.00390625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.9664, "grad_norm": 0.04091455414891243, "learning_rate": 3.736654804270462e-08, "loss": 0.0007, "num_tokens": 295312322.0, "reward": 1.3806979656219482, "reward_std": 0.1109173595905304, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.746747612953186, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7760441303253174, "step": 604 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/max_terminated_length": 456.0, "completions/mean_length": 241.90234375, "completions/mean_terminated_length": 241.90234375, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.968, "grad_norm": 0.054420799016952515, "learning_rate": 3.5587188612099644e-08, "loss": 0.0025, "num_tokens": 295800753.0, "reward": 1.385452389717102, "reward_std": 0.18200629949569702, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.6535894870758057, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7944704294204712, "step": 605 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/max_terminated_length": 474.0, "completions/mean_length": 237.26171875, "completions/mean_terminated_length": 237.26171875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.9696, "grad_norm": 0.04651153087615967, "learning_rate": 3.3807829181494665e-08, "loss": 0.0115, "num_tokens": 296282412.0, "reward": 1.462805151939392, "reward_std": 0.12919017672538757, "rewards/accuracy_reward_long_step": 0.6015625, "rewards/final_brier_reward_long_step": 0.685794472694397, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7591761350631714, "step": 606 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/max_terminated_length": 517.0, "completions/mean_length": 231.671875, "completions/mean_terminated_length": 231.671875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.9712, "grad_norm": 0.046628162264823914, "learning_rate": 3.202846975088968e-08, "loss": 0.0057, "num_tokens": 296766752.0, "reward": 1.5526325702667236, "reward_std": 0.08376991003751755, "rewards/accuracy_reward_long_step": 0.62890625, "rewards/final_brier_reward_long_step": 0.8181566596031189, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8767487406730652, "step": 607 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/max_terminated_length": 499.0, "completions/mean_length": 244.3359375, "completions/mean_terminated_length": 244.3359375, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.9728, "grad_norm": 0.045422233641147614, "learning_rate": 3.0249110320284694e-08, "loss": 0.0073, "num_tokens": 297252086.0, "reward": 1.3045486211776733, "reward_std": 0.10245119035243988, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7651957273483276, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7967486381530762, "step": 608 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 624.0, "completions/max_terminated_length": 624.0, "completions/mean_length": 243.4375, "completions/mean_terminated_length": 243.4375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.9744, "grad_norm": 0.040375716984272, "learning_rate": 2.8469750889679712e-08, "loss": 0.013, "num_tokens": 297745582.0, "reward": 1.4966931343078613, "reward_std": 0.092777319252491, "rewards/accuracy_reward_long_step": 0.57421875, "rewards/final_brier_reward_long_step": 0.837005078792572, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8528923988342285, "step": 609 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 625.0, "completions/max_terminated_length": 625.0, "completions/mean_length": 242.9765625, "completions/mean_terminated_length": 242.9765625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.976, "grad_norm": 0.043781962245702744, "learning_rate": 2.6690391459074733e-08, "loss": 0.006, "num_tokens": 298209824.0, "reward": 1.462713360786438, "reward_std": 0.14046858251094818, "rewards/accuracy_reward_long_step": 0.56640625, "rewards/final_brier_reward_long_step": 0.7839847803115845, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8012436628341675, "step": 610 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/max_terminated_length": 439.0, "completions/mean_length": 224.9609375, "completions/mean_terminated_length": 224.9609375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.9776, "grad_norm": 0.03858632594347, "learning_rate": 2.491103202846975e-08, "loss": 0.0032, "num_tokens": 298700686.0, "reward": 1.3872606754302979, "reward_std": 0.07525929063558578, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.740240216255188, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8088023662567139, "step": 611 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/max_terminated_length": 446.0, "completions/mean_length": 233.19140625, "completions/mean_terminated_length": 233.19140625, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.9792, "grad_norm": 0.04854563623666763, "learning_rate": 2.3131672597864765e-08, "loss": -0.007, "num_tokens": 299182631.0, "reward": 1.2626285552978516, "reward_std": 0.09427875280380249, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7738581895828247, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7610312700271606, "step": 612 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 232.41796875, "completions/mean_terminated_length": 232.41796875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.9808, "grad_norm": 0.04861883446574211, "learning_rate": 2.1352313167259786e-08, "loss": -0.0034, "num_tokens": 299679130.0, "reward": 1.4753010272979736, "reward_std": 0.07958254218101501, "rewards/accuracy_reward_long_step": 0.5546875, "rewards/final_brier_reward_long_step": 0.8655683994293213, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8168855905532837, "step": 613 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 385.0, "completions/max_terminated_length": 385.0, "completions/mean_length": 222.76953125, "completions/mean_terminated_length": 222.76953125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.9824, "grad_norm": 0.04186774417757988, "learning_rate": 1.9572953736654804e-08, "loss": -0.0007, "num_tokens": 300167079.0, "reward": 1.3874082565307617, "reward_std": 0.0872558057308197, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.6611804366111755, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7947026491165161, "step": 614 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 704.0, "completions/max_terminated_length": 704.0, "completions/mean_length": 254.33984375, "completions/mean_terminated_length": 254.33984375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.984, "grad_norm": 0.0385391041636467, "learning_rate": 1.7793594306049822e-08, "loss": 0.0161, "num_tokens": 300662414.0, "reward": 1.3372551202774048, "reward_std": 0.15426021814346313, "rewards/accuracy_reward_long_step": 0.4296875, "rewards/final_brier_reward_long_step": 0.7895093560218811, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8407611846923828, "step": 615 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 507.0, "completions/max_terminated_length": 507.0, "completions/mean_length": 250.22265625, "completions/mean_terminated_length": 250.22265625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.9856, "grad_norm": 0.03464104235172272, "learning_rate": 1.601423487544484e-08, "loss": 0.0092, "num_tokens": 301154455.0, "reward": 1.3746635913848877, "reward_std": 0.11654820293188095, "rewards/accuracy_reward_long_step": 0.46875, "rewards/final_brier_reward_long_step": 0.7886804342269897, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8349737524986267, "step": 616 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.0, "completions/max_terminated_length": 416.0, "completions/mean_length": 237.05078125, "completions/mean_terminated_length": 237.05078125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.9872, "grad_norm": 0.05452824756503105, "learning_rate": 1.4234875444839856e-08, "loss": 0.0012, "num_tokens": 301632876.0, "reward": 1.377845287322998, "reward_std": 0.12956568598747253, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.7508812546730042, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7761249542236328, "step": 617 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/max_terminated_length": 467.0, "completions/mean_length": 234.171875, "completions/mean_terminated_length": 234.171875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.9888, "grad_norm": 0.04497021064162254, "learning_rate": 1.2455516014234875e-08, "loss": -0.003, "num_tokens": 302116440.0, "reward": 1.360137701034546, "reward_std": 0.2063150405883789, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7225565910339355, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7179945111274719, "step": 618 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/max_terminated_length": 425.0, "completions/mean_length": 239.23828125, "completions/mean_terminated_length": 239.23828125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.9904, "grad_norm": 0.038744006305933, "learning_rate": 1.0676156583629893e-08, "loss": -0.0001, "num_tokens": 302594637.0, "reward": 1.3021314144134521, "reward_std": 0.11754617094993591, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7479242086410522, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7887262105941772, "step": 619 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/max_terminated_length": 417.0, "completions/mean_length": 228.94140625, "completions/mean_terminated_length": 228.94140625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.992, "grad_norm": 0.035442836582660675, "learning_rate": 8.896797153024911e-09, "loss": 0.0013, "num_tokens": 303086934.0, "reward": 1.61018705368042, "reward_std": 0.09774182736873627, "rewards/accuracy_reward_long_step": 0.71484375, "rewards/final_brier_reward_long_step": 0.7831144332885742, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7982592582702637, "step": 620 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/max_terminated_length": 371.0, "completions/mean_length": 224.3203125, "completions/mean_terminated_length": 224.3203125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.9936, "grad_norm": 0.05612906068563461, "learning_rate": 7.117437722419928e-09, "loss": 0.0006, "num_tokens": 303565288.0, "reward": 1.5199366807937622, "reward_std": 0.0858568549156189, "rewards/accuracy_reward_long_step": 0.62109375, "rewards/final_brier_reward_long_step": 0.7536445260047913, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8417270183563232, "step": 621 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 556.0, "completions/max_terminated_length": 556.0, "completions/mean_length": 244.44921875, "completions/mean_terminated_length": 244.44921875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.9952, "grad_norm": 0.05516738444566727, "learning_rate": 5.338078291814947e-09, "loss": 0.0138, "num_tokens": 304051859.0, "reward": 1.3782299757003784, "reward_std": 0.11134977638721466, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.770743727684021, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8359257578849792, "step": 622 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.0, "completions/max_terminated_length": 415.0, "completions/mean_length": 227.07421875, "completions/mean_terminated_length": 227.07421875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.9968, "grad_norm": 0.04374608024954796, "learning_rate": 3.558718861209964e-09, "loss": -0.0019, "num_tokens": 304545614.0, "reward": 1.507850170135498, "reward_std": 0.16186021268367767, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.7282167673110962, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8344339728355408, "step": 623 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/max_terminated_length": 519.0, "completions/mean_length": 238.9609375, "completions/mean_terminated_length": 238.9609375, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.9984, "grad_norm": 0.042679328471422195, "learning_rate": 1.779359430604982e-09, "loss": 0.0036, "num_tokens": 305024668.0, "reward": 1.3625683784484863, "reward_std": 0.12260974198579788, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.7129184007644653, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7842304706573486, "step": 624 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/max_terminated_length": 465.0, "completions/mean_length": 234.5, "completions/mean_terminated_length": 234.5, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 1.0, "grad_norm": 0.04114016145467758, "learning_rate": 0.0, "loss": 0.017, "num_tokens": 305506462.0, "reward": 1.5832818746566772, "reward_std": 0.10286815464496613, "rewards/accuracy_reward_long_step": 0.6875, "rewards/final_brier_reward_long_step": 0.7612390518188477, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8218887448310852, "step": 625 }, { "epoch": 1.0, "step": 625, "total_flos": 0.0, "train_loss": -0.007067593541555107, "train_runtime": 30966.4861, "train_samples_per_second": 0.646, "train_steps_per_second": 0.02 } ], "logging_steps": 1, "max_steps": 625, "num_input_tokens_seen": 305506462, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }