{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.32, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1640625, "completions/max_length": 1000.0, "completions/max_terminated_length": 1000.0, "completions/mean_length": 341.4609375, "completions/mean_terminated_length": 408.47662353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0016, "grad_norm": 0.17645138502120972, "learning_rate": 5e-08, "loss": -0.116, "num_tokens": 486582.0, "reward": 0.41310209035873413, "reward_std": 0.4805126190185547, "rewards/accuracy_reward_long_step": 0.2265625, "rewards/final_brier_reward_long_step": 0.11814829707145691, "rewards/format_reward_long_step": 0.23046875, "rewards/stepwise_brier_reward_long_step": 0.1670725792646408, "step": 1 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.19140625, "completions/max_length": 1019.0, "completions/max_terminated_length": 1019.0, "completions/mean_length": 303.75, "completions/mean_terminated_length": 375.65216064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.6896445155143738, "learning_rate": 1e-07, "loss": -0.1486, "num_tokens": 985630.0, "reward": 0.4098304212093353, "reward_std": 0.5015645623207092, "rewards/accuracy_reward_long_step": 0.1875, "rewards/final_brier_reward_long_step": 0.1355031430721283, "rewards/format_reward_long_step": 0.27734375, "rewards/stepwise_brier_reward_long_step": 0.1991310715675354, "step": 2 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1001.0, "completions/max_terminated_length": 1001.0, "completions/mean_length": 327.15234375, "completions/mean_terminated_length": 402.6490478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0048, "grad_norm": 0.40888160467147827, "learning_rate": 1.5e-07, "loss": -0.1498, "num_tokens": 1484085.0, "reward": 0.45745182037353516, "reward_std": 0.5877432227134705, "rewards/accuracy_reward_long_step": 0.21484375, "rewards/final_brier_reward_long_step": 0.15137070417404175, "rewards/format_reward_long_step": 0.2890625, "rewards/stepwise_brier_reward_long_step": 0.24093663692474365, "step": 3 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.75, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.15000000000000002, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.20703125, "completions/max_length": 1003.0, "completions/max_terminated_length": 1003.0, "completions/mean_length": 292.12890625, "completions/mean_terminated_length": 368.3990173339844, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 5.972024917602539, "learning_rate": 2e-07, "loss": -0.1685, "num_tokens": 1985870.0, "reward": 0.3280823230743408, "reward_std": 0.4515482783317566, "rewards/accuracy_reward_long_step": 0.15625, "rewards/final_brier_reward_long_step": 0.10437265783548355, "rewards/format_reward_long_step": 0.21484375, "rewards/stepwise_brier_reward_long_step": 0.15326906740665436, "step": 4 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.75, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15234375, "completions/max_length": 1017.0, "completions/max_terminated_length": 1017.0, "completions/mean_length": 338.7734375, "completions/mean_terminated_length": 399.65899658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.008, "grad_norm": 0.17540286481380463, "learning_rate": 2.5e-07, "loss": -0.0985, "num_tokens": 2508156.0, "reward": 0.3251497149467468, "reward_std": 0.4245316982269287, "rewards/accuracy_reward_long_step": 0.14453125, "rewards/final_brier_reward_long_step": 0.12149253487586975, "rewards/format_reward_long_step": 0.22265625, "rewards/stepwise_brier_reward_long_step": 0.15566879510879517, "step": 5 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13671875, "completions/max_length": 994.0, "completions/max_terminated_length": 994.0, "completions/mean_length": 347.57421875, "completions/mean_terminated_length": 402.61993408203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.40559351444244385, "learning_rate": 3e-07, "loss": -0.1487, "num_tokens": 3024799.0, "reward": 0.39223921298980713, "reward_std": 0.5162125825881958, "rewards/accuracy_reward_long_step": 0.1640625, "rewards/final_brier_reward_long_step": 0.13095274567604065, "rewards/format_reward_long_step": 0.28515625, "rewards/stepwise_brier_reward_long_step": 0.21144148707389832, "step": 6 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 973.0, "completions/max_terminated_length": 973.0, "completions/mean_length": 341.73828125, "completions/mean_terminated_length": 390.55804443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0112, "grad_norm": 0.18455225229263306, "learning_rate": 3.5e-07, "loss": -0.0943, "num_tokens": 3531764.0, "reward": 0.3882223963737488, "reward_std": 0.5209769606590271, "rewards/accuracy_reward_long_step": 0.171875, "rewards/final_brier_reward_long_step": 0.1431557536125183, "rewards/format_reward_long_step": 0.265625, "rewards/stepwise_brier_reward_long_step": 0.1909838318824768, "step": 7 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.5, "calib/mean_conf": 0.925, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.024999999999999967, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13671875, "completions/max_length": 997.0, "completions/max_terminated_length": 997.0, "completions/mean_length": 329.87890625, "completions/mean_terminated_length": 382.1221923828125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.214069664478302, "learning_rate": 4e-07, "loss": -0.114, "num_tokens": 4020813.0, "reward": 0.4169233441352844, "reward_std": 0.530878484249115, "rewards/accuracy_reward_long_step": 0.203125, "rewards/final_brier_reward_long_step": 0.12548723816871643, "rewards/format_reward_long_step": 0.265625, "rewards/stepwise_brier_reward_long_step": 0.19845610857009888, "step": 8 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.2, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.18359375, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 327.0546875, "completions/mean_terminated_length": 400.60284423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0144, "grad_norm": 0.5599529147148132, "learning_rate": 4.5e-07, "loss": -0.1094, "num_tokens": 4537731.0, "reward": 0.26959529519081116, "reward_std": 0.40018031001091003, "rewards/accuracy_reward_long_step": 0.07421875, "rewards/final_brier_reward_long_step": 0.102345310151577, "rewards/format_reward_long_step": 0.25, "rewards/stepwise_brier_reward_long_step": 0.17916086316108704, "step": 9 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 1003.0, "completions/max_terminated_length": 1003.0, "completions/mean_length": 344.32421875, "completions/mean_terminated_length": 395.2780456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.016, "grad_norm": 0.06352151185274124, "learning_rate": 5e-07, "loss": -0.081, "num_tokens": 5045750.0, "reward": 0.34384429454803467, "reward_std": 0.42729049921035767, "rewards/accuracy_reward_long_step": 0.14453125, "rewards/final_brier_reward_long_step": 0.11546708643436432, "rewards/format_reward_long_step": 0.2578125, "rewards/stepwise_brier_reward_long_step": 0.166159987449646, "step": 10 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1953125, "completions/max_length": 1004.0, "completions/max_terminated_length": 1004.0, "completions/mean_length": 309.78125, "completions/mean_terminated_length": 384.97088623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0176, "grad_norm": 0.07709571719169617, "learning_rate": 5.5e-07, "loss": -0.1551, "num_tokens": 5556670.0, "reward": 0.314059853553772, "reward_std": 0.4106077551841736, "rewards/accuracy_reward_long_step": 0.125, "rewards/final_brier_reward_long_step": 0.0948641449213028, "rewards/format_reward_long_step": 0.24609375, "rewards/stepwise_brier_reward_long_step": 0.1691877841949463, "step": 11 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1953125, "completions/max_length": 1024.0, "completions/max_terminated_length": 1024.0, "completions/mean_length": 336.19921875, "completions/mean_terminated_length": 417.80096435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.7987160086631775, "learning_rate": 6e-07, "loss": -0.1555, "num_tokens": 6065281.0, "reward": 0.388057678937912, "reward_std": 0.4589148163795471, "rewards/accuracy_reward_long_step": 0.2265625, "rewards/final_brier_reward_long_step": 0.08411991596221924, "rewards/format_reward_long_step": 0.203125, "rewards/stepwise_brier_reward_long_step": 0.15561071038246155, "step": 12 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.19140625, "completions/max_length": 979.0, "completions/max_terminated_length": 979.0, "completions/mean_length": 305.63671875, "completions/mean_terminated_length": 377.9855041503906, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0208, "grad_norm": 0.14753887057304382, "learning_rate": 6.5e-07, "loss": -0.1082, "num_tokens": 6576452.0, "reward": 0.4127691388130188, "reward_std": 0.4886016845703125, "rewards/accuracy_reward_long_step": 0.18359375, "rewards/final_brier_reward_long_step": 0.1466279774904251, "rewards/format_reward_long_step": 0.28515625, "rewards/stepwise_brier_reward_long_step": 0.1997610330581665, "step": 13 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.5650000000000001, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.23500000000000001, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16796875, "completions/max_length": 1021.0, "completions/max_terminated_length": 1021.0, "completions/mean_length": 324.9140625, "completions/mean_terminated_length": 390.5070495605469, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0224, "grad_norm": 0.14357596635818481, "learning_rate": 7e-07, "loss": -0.1369, "num_tokens": 7091094.0, "reward": 0.5405763387680054, "reward_std": 0.5295156836509705, "rewards/accuracy_reward_long_step": 0.26171875, "rewards/final_brier_reward_long_step": 0.17839357256889343, "rewards/format_reward_long_step": 0.34765625, "rewards/stepwise_brier_reward_long_step": 0.24172407388687134, "step": 14 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 1002.0, "completions/max_terminated_length": 1002.0, "completions/mean_length": 351.58984375, "completions/mean_terminated_length": 400.0311279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024, "grad_norm": 0.15006816387176514, "learning_rate": 7.5e-07, "loss": -0.0696, "num_tokens": 7608909.0, "reward": 0.4260837733745575, "reward_std": 0.49461331963539124, "rewards/accuracy_reward_long_step": 0.1328125, "rewards/final_brier_reward_long_step": 0.1504545956850052, "rewards/format_reward_long_step": 0.3828125, "rewards/stepwise_brier_reward_long_step": 0.2570054233074188, "step": 15 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 966.0, "completions/max_terminated_length": 966.0, "completions/mean_length": 354.89453125, "completions/mean_terminated_length": 412.9681701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0256, "grad_norm": 0.11908330768346786, "learning_rate": 8e-07, "loss": -0.0647, "num_tokens": 8142738.0, "reward": 0.45549800992012024, "reward_std": 0.5145381093025208, "rewards/accuracy_reward_long_step": 0.1953125, "rewards/final_brier_reward_long_step": 0.14264921844005585, "rewards/format_reward_long_step": 0.33203125, "rewards/stepwise_brier_reward_long_step": 0.23403030633926392, "step": 16 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.5, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 1012.0, "completions/max_terminated_length": 1012.0, "completions/mean_length": 327.94140625, "completions/mean_terminated_length": 381.6045227050781, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0272, "grad_norm": 0.10344522446393967, "learning_rate": 8.499999999999999e-07, "loss": -0.1339, "num_tokens": 8624011.0, "reward": 0.4539112448692322, "reward_std": 0.535567045211792, "rewards/accuracy_reward_long_step": 0.15625, "rewards/final_brier_reward_long_step": 0.16395819187164307, "rewards/format_reward_long_step": 0.37890625, "rewards/stepwise_brier_reward_long_step": 0.26887428760528564, "step": 17 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 1008.0, "completions/max_terminated_length": 1008.0, "completions/mean_length": 356.265625, "completions/mean_terminated_length": 400.0175476074219, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.0288, "grad_norm": 0.06071805581450462, "learning_rate": 9e-07, "loss": -0.0799, "num_tokens": 9139559.0, "reward": 0.47327494621276855, "reward_std": 0.4894096851348877, "rewards/accuracy_reward_long_step": 0.15625, "rewards/final_brier_reward_long_step": 0.1746734380722046, "rewards/format_reward_long_step": 0.3984375, "rewards/stepwise_brier_reward_long_step": 0.2965514063835144, "step": 18 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.5, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 962.0, "completions/max_terminated_length": 962.0, "completions/mean_length": 345.92578125, "completions/mean_terminated_length": 373.658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.0304, "grad_norm": 0.04385017603635788, "learning_rate": 9.499999999999999e-07, "loss": -0.0804, "num_tokens": 9656020.0, "reward": 0.5117301940917969, "reward_std": 0.5140496492385864, "rewards/accuracy_reward_long_step": 0.1875, "rewards/final_brier_reward_long_step": 0.1626010686159134, "rewards/format_reward_long_step": 0.4140625, "rewards/stepwise_brier_reward_long_step": 0.3061947822570801, "step": 19 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.35, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 1015.0, "completions/max_terminated_length": 1015.0, "completions/mean_length": 345.9765625, "completions/mean_terminated_length": 388.46490478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.032, "grad_norm": 0.12682317197322845, "learning_rate": 1e-06, "loss": -0.1204, "num_tokens": 10172294.0, "reward": 0.5370358228683472, "reward_std": 0.5269090533256531, "rewards/accuracy_reward_long_step": 0.1953125, "rewards/final_brier_reward_long_step": 0.17095977067947388, "rewards/format_reward_long_step": 0.44921875, "rewards/stepwise_brier_reward_long_step": 0.29749590158462524, "step": 20 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 1007.0, "completions/max_terminated_length": 1007.0, "completions/mean_length": 326.0625, "completions/mean_terminated_length": 369.3451232910156, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.0336, "grad_norm": 0.1039290726184845, "learning_rate": 9.944444444444444e-07, "loss": -0.0887, "num_tokens": 10682942.0, "reward": 0.6263545155525208, "reward_std": 0.5448290109634399, "rewards/accuracy_reward_long_step": 0.1953125, "rewards/final_brier_reward_long_step": 0.23252148926258087, "rewards/format_reward_long_step": 0.55078125, "rewards/stepwise_brier_reward_long_step": 0.39008423686027527, "step": 21 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 967.0, "completions/max_terminated_length": 967.0, "completions/mean_length": 318.80859375, "completions/mean_terminated_length": 354.8478088378906, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0352, "grad_norm": 0.07774824649095535, "learning_rate": 9.88888888888889e-07, "loss": -0.0844, "num_tokens": 11188405.0, "reward": 0.5633851885795593, "reward_std": 0.480153888463974, "rewards/accuracy_reward_long_step": 0.16015625, "rewards/final_brier_reward_long_step": 0.20337249338626862, "rewards/format_reward_long_step": 0.515625, "rewards/stepwise_brier_reward_long_step": 0.37829315662384033, "step": 22 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 1003.0, "completions/max_terminated_length": 1003.0, "completions/mean_length": 321.98828125, "completions/mean_terminated_length": 355.29742431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.0368, "grad_norm": 0.06589806079864502, "learning_rate": 9.833333333333332e-07, "loss": -0.0568, "num_tokens": 11700754.0, "reward": 0.7173271179199219, "reward_std": 0.5148348808288574, "rewards/accuracy_reward_long_step": 0.25, "rewards/final_brier_reward_long_step": 0.2776620388031006, "rewards/format_reward_long_step": 0.60546875, "rewards/stepwise_brier_reward_long_step": 0.3807087540626526, "step": 23 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 910.0, "completions/max_terminated_length": 910.0, "completions/mean_length": 338.0546875, "completions/mean_terminated_length": 351.7967224121094, "completions/min_length": 0.0, "completions/min_terminated_length": 37.0, "epoch": 0.0384, "grad_norm": 0.2670823037624359, "learning_rate": 9.777777777777778e-07, "loss": -0.066, "num_tokens": 12206096.0, "reward": 0.799688994884491, "reward_std": 0.49856090545654297, "rewards/accuracy_reward_long_step": 0.2578125, "rewards/final_brier_reward_long_step": 0.3083125054836273, "rewards/format_reward_long_step": 0.66796875, "rewards/stepwise_brier_reward_long_step": 0.5232560634613037, "step": 24 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.5, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 985.0, "completions/max_terminated_length": 985.0, "completions/mean_length": 337.97265625, "completions/mean_terminated_length": 353.14691162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.04, "grad_norm": 0.12033725529909134, "learning_rate": 9.722222222222222e-07, "loss": -0.0464, "num_tokens": 12725273.0, "reward": 0.7678510546684265, "reward_std": 0.5629936456680298, "rewards/accuracy_reward_long_step": 0.234375, "rewards/final_brier_reward_long_step": 0.31047940254211426, "rewards/format_reward_long_step": 0.671875, "rewards/stepwise_brier_reward_long_step": 0.479674756526947, "step": 25 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 990.0, "completions/max_terminated_length": 990.0, "completions/mean_length": 329.58984375, "completions/mean_terminated_length": 340.2217712402344, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.0416, "grad_norm": 0.05781063064932823, "learning_rate": 9.666666666666666e-07, "loss": -0.0628, "num_tokens": 13247448.0, "reward": 0.8087503910064697, "reward_std": 0.5137581825256348, "rewards/accuracy_reward_long_step": 0.25390625, "rewards/final_brier_reward_long_step": 0.32855507731437683, "rewards/format_reward_long_step": 0.703125, "rewards/stepwise_brier_reward_long_step": 0.484571635723114, "step": 26 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 848.0, "completions/max_terminated_length": 848.0, "completions/mean_length": 294.39453125, "completions/mean_terminated_length": 308.8729248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.0432, "grad_norm": 0.1369597166776657, "learning_rate": 9.61111111111111e-07, "loss": -0.0484, "num_tokens": 13739733.0, "reward": 0.7955328226089478, "reward_std": 0.45239853858947754, "rewards/accuracy_reward_long_step": 0.18359375, "rewards/final_brier_reward_long_step": 0.35069864988327026, "rewards/format_reward_long_step": 0.78515625, "rewards/stepwise_brier_reward_long_step": 0.526745080947876, "step": 27 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 847.0, "completions/max_terminated_length": 847.0, "completions/mean_length": 299.19140625, "completions/mean_terminated_length": 307.6023864746094, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.0448, "grad_norm": 0.042039766907691956, "learning_rate": 9.555555555555556e-07, "loss": -0.0462, "num_tokens": 14235238.0, "reward": 0.8760055303573608, "reward_std": 0.4597265124320984, "rewards/accuracy_reward_long_step": 0.26171875, "rewards/final_brier_reward_long_step": 0.3385574221611023, "rewards/format_reward_long_step": 0.78515625, "rewards/stepwise_brier_reward_long_step": 0.5482772588729858, "step": 28 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 968.0, "completions/max_terminated_length": 968.0, "completions/mean_length": 307.88671875, "completions/mean_terminated_length": 316.5421447753906, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.0464, "grad_norm": 0.17529161274433136, "learning_rate": 9.499999999999999e-07, "loss": -0.06, "num_tokens": 14747433.0, "reward": 0.8551455736160278, "reward_std": 0.4609827995300293, "rewards/accuracy_reward_long_step": 0.22265625, "rewards/final_brier_reward_long_step": 0.3258519172668457, "rewards/format_reward_long_step": 0.8125, "rewards/stepwise_brier_reward_long_step": 0.5791054964065552, "step": 29 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 910.0, "completions/max_terminated_length": 910.0, "completions/mean_length": 292.421875, "completions/mean_terminated_length": 299.44000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.048, "grad_norm": 0.05863456800580025, "learning_rate": 9.444444444444444e-07, "loss": -0.0448, "num_tokens": 15236589.0, "reward": 1.0201013088226318, "reward_std": 0.43484771251678467, "rewards/accuracy_reward_long_step": 0.32421875, "rewards/final_brier_reward_long_step": 0.4454140067100525, "rewards/format_reward_long_step": 0.8671875, "rewards/stepwise_brier_reward_long_step": 0.6037412881851196, "step": 30 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 639.0, "completions/max_terminated_length": 639.0, "completions/mean_length": 284.203125, "completions/mean_terminated_length": 286.4409484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.0496, "grad_norm": 0.06036984175443649, "learning_rate": 9.388888888888888e-07, "loss": -0.0118, "num_tokens": 15728881.0, "reward": 1.0137848854064941, "reward_std": 0.45152291655540466, "rewards/accuracy_reward_long_step": 0.296875, "rewards/final_brier_reward_long_step": 0.4248659014701843, "rewards/format_reward_long_step": 0.88671875, "rewards/stepwise_brier_reward_long_step": 0.6693359017372131, "step": 31 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 895.0, "completions/max_terminated_length": 895.0, "completions/mean_length": 287.9296875, "completions/mean_terminated_length": 296.0240783691406, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.0512, "grad_norm": 0.18194638192653656, "learning_rate": 9.333333333333333e-07, "loss": -0.0144, "num_tokens": 16213351.0, "reward": 0.8365803360939026, "reward_std": 0.42848724126815796, "rewards/accuracy_reward_long_step": 0.19921875, "rewards/final_brier_reward_long_step": 0.34980231523513794, "rewards/format_reward_long_step": 0.83203125, "rewards/stepwise_brier_reward_long_step": 0.5355815887451172, "step": 32 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 893.0, "completions/max_terminated_length": 893.0, "completions/mean_length": 298.71875, "completions/mean_terminated_length": 305.88800048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.0528, "grad_norm": 0.11843208968639374, "learning_rate": 9.277777777777777e-07, "loss": -0.0299, "num_tokens": 16715199.0, "reward": 0.9657071232795715, "reward_std": 0.40177035331726074, "rewards/accuracy_reward_long_step": 0.265625, "rewards/final_brier_reward_long_step": 0.4098663926124573, "rewards/format_reward_long_step": 0.8984375, "rewards/stepwise_brier_reward_long_step": 0.5935869812965393, "step": 33 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 744.0, "completions/max_terminated_length": 744.0, "completions/mean_length": 286.67578125, "completions/mean_terminated_length": 290.0751037597656, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.0544, "grad_norm": 0.052849605679512024, "learning_rate": 9.222222222222222e-07, "loss": -0.0391, "num_tokens": 17214404.0, "reward": 0.9088114500045776, "reward_std": 0.4241497814655304, "rewards/accuracy_reward_long_step": 0.2265625, "rewards/final_brier_reward_long_step": 0.37564724683761597, "rewards/format_reward_long_step": 0.875, "rewards/stepwise_brier_reward_long_step": 0.6033484935760498, "step": 34 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 835.0, "completions/max_terminated_length": 835.0, "completions/mean_length": 270.71484375, "completions/mean_terminated_length": 273.9249267578125, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.056, "grad_norm": 0.049951206892728806, "learning_rate": 9.166666666666665e-07, "loss": -0.0334, "num_tokens": 17712787.0, "reward": 1.1728079319000244, "reward_std": 0.4307233691215515, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.5377765893936157, "rewards/format_reward_long_step": 0.93359375, "rewards/stepwise_brier_reward_long_step": 0.6925181150436401, "step": 35 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 747.0, "completions/max_terminated_length": 747.0, "completions/mean_length": 268.5234375, "completions/mean_terminated_length": 270.6377868652344, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.0576, "grad_norm": 0.07510220259428024, "learning_rate": 9.11111111111111e-07, "loss": -0.0289, "num_tokens": 18199657.0, "reward": 1.1637983322143555, "reward_std": 0.46460413932800293, "rewards/accuracy_reward_long_step": 0.390625, "rewards/final_brier_reward_long_step": 0.5209541916847229, "rewards/format_reward_long_step": 0.9375, "rewards/stepwise_brier_reward_long_step": 0.6967387199401855, "step": 36 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 746.0, "completions/max_terminated_length": 746.0, "completions/mean_length": 282.359375, "completions/mean_terminated_length": 282.359375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.0592, "grad_norm": 0.15866929292678833, "learning_rate": 9.055555555555556e-07, "loss": 0.0026, "num_tokens": 18698421.0, "reward": 1.1005845069885254, "reward_std": 0.40720871090888977, "rewards/accuracy_reward_long_step": 0.328125, "rewards/final_brier_reward_long_step": 0.5032482743263245, "rewards/format_reward_long_step": 0.9375, "rewards/stepwise_brier_reward_long_step": 0.7115898132324219, "step": 37 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 839.0, "completions/max_terminated_length": 839.0, "completions/mean_length": 274.265625, "completions/mean_terminated_length": 275.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.0608, "grad_norm": 0.17633329331874847, "learning_rate": 9e-07, "loss": -0.0303, "num_tokens": 19198241.0, "reward": 1.034214973449707, "reward_std": 0.37503814697265625, "rewards/accuracy_reward_long_step": 0.26171875, "rewards/final_brier_reward_long_step": 0.45626088976860046, "rewards/format_reward_long_step": 0.95703125, "rewards/stepwise_brier_reward_long_step": 0.7196618318557739, "step": 38 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 909.0, "completions/max_terminated_length": 909.0, "completions/mean_length": 266.796875, "completions/mean_terminated_length": 269.9604797363281, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.0624, "grad_norm": 0.1747618317604065, "learning_rate": 8.944444444444445e-07, "loss": -0.0408, "num_tokens": 19687013.0, "reward": 0.9186801910400391, "reward_std": 0.33332592248916626, "rewards/accuracy_reward_long_step": 0.203125, "rewards/final_brier_reward_long_step": 0.42509374022483826, "rewards/format_reward_long_step": 0.94140625, "rewards/stepwise_brier_reward_long_step": 0.5543146133422852, "step": 39 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 692.0, "completions/max_terminated_length": 692.0, "completions/mean_length": 269.41796875, "completions/mean_terminated_length": 272.6126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.064, "grad_norm": 0.4975915253162384, "learning_rate": 8.888888888888888e-07, "loss": -0.0294, "num_tokens": 20171848.0, "reward": 1.0980305671691895, "reward_std": 0.3671276569366455, "rewards/accuracy_reward_long_step": 0.32421875, "rewards/final_brier_reward_long_step": 0.5042629241943359, "rewards/format_reward_long_step": 0.9296875, "rewards/stepwise_brier_reward_long_step": 0.7316096425056458, "step": 40 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 790.0, "completions/max_terminated_length": 790.0, "completions/mean_length": 276.7265625, "completions/mean_terminated_length": 276.7265625, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.0656, "grad_norm": 0.19570253789424896, "learning_rate": 8.833333333333333e-07, "loss": -0.0075, "num_tokens": 20676338.0, "reward": 0.9547429084777832, "reward_std": 0.35480546951293945, "rewards/accuracy_reward_long_step": 0.2265625, "rewards/final_brier_reward_long_step": 0.4319256842136383, "rewards/format_reward_long_step": 0.91796875, "rewards/stepwise_brier_reward_long_step": 0.6448584794998169, "step": 41 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 708.0, "completions/max_terminated_length": 708.0, "completions/mean_length": 271.44140625, "completions/mean_terminated_length": 272.5058898925781, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.0672, "grad_norm": 0.09999877214431763, "learning_rate": 8.777777777777777e-07, "loss": 0.0075, "num_tokens": 21162843.0, "reward": 1.261709213256836, "reward_std": 0.47088778018951416, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.5992207527160645, "rewards/format_reward_long_step": 0.9296875, "rewards/stepwise_brier_reward_long_step": 0.697616457939148, "step": 42 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 634.0, "completions/max_terminated_length": 634.0, "completions/mean_length": 268.66015625, "completions/mean_terminated_length": 268.66015625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.0688, "grad_norm": 0.0876559317111969, "learning_rate": 8.722222222222222e-07, "loss": -0.0159, "num_tokens": 21664932.0, "reward": 1.074408769607544, "reward_std": 0.3138850927352905, "rewards/accuracy_reward_long_step": 0.28515625, "rewards/final_brier_reward_long_step": 0.49867674708366394, "rewards/format_reward_long_step": 0.95703125, "rewards/stepwise_brier_reward_long_step": 0.744270920753479, "step": 43 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 802.0, "completions/max_terminated_length": 802.0, "completions/mean_length": 288.703125, "completions/mean_terminated_length": 288.703125, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.0704, "grad_norm": 0.27019646763801575, "learning_rate": 8.666666666666667e-07, "loss": 0.0153, "num_tokens": 22174696.0, "reward": 1.186435580253601, "reward_std": 0.3493732213973999, "rewards/accuracy_reward_long_step": 0.40234375, "rewards/final_brier_reward_long_step": 0.5960390567779541, "rewards/format_reward_long_step": 0.94921875, "rewards/stepwise_brier_reward_long_step": 0.6418907642364502, "step": 44 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 829.0, "completions/max_terminated_length": 829.0, "completions/mean_length": 276.6796875, "completions/mean_terminated_length": 278.8582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.072, "grad_norm": 0.03910618647933006, "learning_rate": 8.611111111111111e-07, "loss": -0.0197, "num_tokens": 22667598.0, "reward": 1.0908488035202026, "reward_std": 0.3936161398887634, "rewards/accuracy_reward_long_step": 0.30078125, "rewards/final_brier_reward_long_step": 0.571899950504303, "rewards/format_reward_long_step": 0.9375, "rewards/stepwise_brier_reward_long_step": 0.7133700847625732, "step": 45 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 946.0, "completions/max_terminated_length": 946.0, "completions/mean_length": 258.7109375, "completions/mean_terminated_length": 260.7480163574219, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.0736, "grad_norm": 0.05340059474110603, "learning_rate": 8.555555555555555e-07, "loss": -0.0053, "num_tokens": 23136156.0, "reward": 1.0639352798461914, "reward_std": 0.35911956429481506, "rewards/accuracy_reward_long_step": 0.27734375, "rewards/final_brier_reward_long_step": 0.5164073705673218, "rewards/format_reward_long_step": 0.96484375, "rewards/stepwise_brier_reward_long_step": 0.7002708315849304, "step": 46 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 750.0, "completions/max_terminated_length": 750.0, "completions/mean_length": 258.69140625, "completions/mean_terminated_length": 258.69140625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.0752, "grad_norm": 0.15533719956874847, "learning_rate": 8.499999999999999e-07, "loss": 0.0048, "num_tokens": 23612765.0, "reward": 1.1923271417617798, "reward_std": 0.36889374256134033, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.591122031211853, "rewards/format_reward_long_step": 0.9765625, "rewards/stepwise_brier_reward_long_step": 0.7250616550445557, "step": 47 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 983.0, "completions/max_terminated_length": 983.0, "completions/mean_length": 264.12890625, "completions/mean_terminated_length": 266.2086486816406, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.0768, "grad_norm": 0.0998203307390213, "learning_rate": 8.444444444444444e-07, "loss": 0.0031, "num_tokens": 24090910.0, "reward": 1.1535530090332031, "reward_std": 0.3134958744049072, "rewards/accuracy_reward_long_step": 0.328125, "rewards/final_brier_reward_long_step": 0.6210886240005493, "rewards/format_reward_long_step": 0.96875, "rewards/stepwise_brier_reward_long_step": 0.7431235313415527, "step": 48 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 778.0, "completions/max_terminated_length": 778.0, "completions/mean_length": 275.03515625, "completions/mean_terminated_length": 275.03515625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.0784, "grad_norm": 0.09424059092998505, "learning_rate": 8.388888888888888e-07, "loss": 0.0141, "num_tokens": 24596631.0, "reward": 1.1789720058441162, "reward_std": 0.3338298797607422, "rewards/accuracy_reward_long_step": 0.34765625, "rewards/final_brier_reward_long_step": 0.615998387336731, "rewards/format_reward_long_step": 0.97265625, "rewards/stepwise_brier_reward_long_step": 0.7639520764350891, "step": 49 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 657.0, "completions/max_terminated_length": 657.0, "completions/mean_length": 252.19140625, "completions/mean_terminated_length": 252.19140625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.08, "grad_norm": 0.21770058572292328, "learning_rate": 8.333333333333333e-07, "loss": -0.0245, "num_tokens": 25085544.0, "reward": 1.1415338516235352, "reward_std": 0.3256559669971466, "rewards/accuracy_reward_long_step": 0.3203125, "rewards/final_brier_reward_long_step": 0.6072898507118225, "rewards/format_reward_long_step": 0.96484375, "rewards/stepwise_brier_reward_long_step": 0.7479082345962524, "step": 50 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 948.0, "completions/max_terminated_length": 948.0, "completions/mean_length": 254.66796875, "completions/mean_terminated_length": 256.6732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.0816, "grad_norm": 0.08060960471630096, "learning_rate": 8.277777777777777e-07, "loss": -0.0449, "num_tokens": 25574811.0, "reward": 1.2235015630722046, "reward_std": 0.3895331621170044, "rewards/accuracy_reward_long_step": 0.39453125, "rewards/final_brier_reward_long_step": 0.6493412256240845, "rewards/format_reward_long_step": 0.95703125, "rewards/stepwise_brier_reward_long_step": 0.7524775266647339, "step": 51 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 582.0, "completions/max_terminated_length": 582.0, "completions/mean_length": 255.12890625, "completions/mean_terminated_length": 255.12890625, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.0832, "grad_norm": 0.1500002145767212, "learning_rate": 8.222222222222221e-07, "loss": -0.0241, "num_tokens": 26071732.0, "reward": 1.1828399896621704, "reward_std": 0.2660859525203705, "rewards/accuracy_reward_long_step": 0.34375, "rewards/final_brier_reward_long_step": 0.6490460634231567, "rewards/format_reward_long_step": 0.984375, "rewards/stepwise_brier_reward_long_step": 0.738564133644104, "step": 52 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 956.0, "completions/max_terminated_length": 956.0, "completions/mean_length": 259.86328125, "completions/mean_terminated_length": 260.8823547363281, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.0848, "grad_norm": 0.093411885201931, "learning_rate": 8.166666666666666e-07, "loss": -0.0201, "num_tokens": 26561105.0, "reward": 1.0413353443145752, "reward_std": 0.2801922559738159, "rewards/accuracy_reward_long_step": 0.22265625, "rewards/final_brier_reward_long_step": 0.6196457147598267, "rewards/format_reward_long_step": 0.9609375, "rewards/stepwise_brier_reward_long_step": 0.7331956624984741, "step": 53 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 599.0, "completions/max_terminated_length": 599.0, "completions/mean_length": 242.04296875, "completions/mean_terminated_length": 242.04296875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.0864, "grad_norm": 0.061436332762241364, "learning_rate": 8.11111111111111e-07, "loss": 0.0006, "num_tokens": 27042404.0, "reward": 1.1769518852233887, "reward_std": 0.3248249292373657, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.6683531999588013, "rewards/format_reward_long_step": 0.96875, "rewards/stepwise_brier_reward_long_step": 0.7738291025161743, "step": 54 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 530.0, "completions/max_terminated_length": 530.0, "completions/mean_length": 239.16796875, "completions/mean_terminated_length": 240.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.088, "grad_norm": 0.1144290566444397, "learning_rate": 8.055555555555556e-07, "loss": -0.0147, "num_tokens": 27540463.0, "reward": 1.2522761821746826, "reward_std": 0.29164189100265503, "rewards/accuracy_reward_long_step": 0.390625, "rewards/final_brier_reward_long_step": 0.6936222314834595, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7764202356338501, "step": 55 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 827.0, "completions/max_terminated_length": 827.0, "completions/mean_length": 254.3984375, "completions/mean_terminated_length": 255.39608764648438, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.0896, "grad_norm": 0.056342847645282745, "learning_rate": 8e-07, "loss": -0.0099, "num_tokens": 28036997.0, "reward": 1.2792823314666748, "reward_std": 0.26004308462142944, "rewards/accuracy_reward_long_step": 0.421875, "rewards/final_brier_reward_long_step": 0.7217453122138977, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7313213348388672, "step": 56 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 549.0, "completions/max_terminated_length": 549.0, "completions/mean_length": 247.80859375, "completions/mean_terminated_length": 247.80859375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.0912, "grad_norm": 0.03911300748586655, "learning_rate": 7.944444444444444e-07, "loss": 0.0139, "num_tokens": 28513988.0, "reward": 1.195192575454712, "reward_std": 0.32286006212234497, "rewards/accuracy_reward_long_step": 0.33984375, "rewards/final_brier_reward_long_step": 0.7113619446754456, "rewards/format_reward_long_step": 0.98046875, "rewards/stepwise_brier_reward_long_step": 0.7490957975387573, "step": 57 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 242.87109375, "completions/mean_terminated_length": 243.82354736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.0928, "grad_norm": 0.10559872537851334, "learning_rate": 7.888888888888889e-07, "loss": -0.0377, "num_tokens": 29001755.0, "reward": 1.17831289768219, "reward_std": 0.2993444800376892, "rewards/accuracy_reward_long_step": 0.3203125, "rewards/final_brier_reward_long_step": 0.7109405994415283, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7444983720779419, "step": 58 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 650.0, "completions/max_terminated_length": 650.0, "completions/mean_length": 253.3828125, "completions/mean_terminated_length": 254.37648010253906, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.0944, "grad_norm": 0.03542870655655861, "learning_rate": 7.833333333333333e-07, "loss": -0.0018, "num_tokens": 29495773.0, "reward": 1.2244206666946411, "reward_std": 0.2692364752292633, "rewards/accuracy_reward_long_step": 0.3515625, "rewards/final_brier_reward_long_step": 0.7496439814567566, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7652260065078735, "step": 59 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 611.0, "completions/max_terminated_length": 611.0, "completions/mean_length": 245.73046875, "completions/mean_terminated_length": 245.73046875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.096, "grad_norm": 0.04337484389543533, "learning_rate": 7.777777777777778e-07, "loss": 0.028, "num_tokens": 29976952.0, "reward": 1.199751853942871, "reward_std": 0.2547294497489929, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7303680181503296, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7561392784118652, "step": 60 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 749.0, "completions/max_terminated_length": 749.0, "completions/mean_length": 247.1015625, "completions/mean_terminated_length": 248.0706024169922, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.0976, "grad_norm": 0.06807160377502441, "learning_rate": 7.722222222222222e-07, "loss": -0.0207, "num_tokens": 30471338.0, "reward": 1.2661097049713135, "reward_std": 0.2852725088596344, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.716038703918457, "rewards/format_reward_long_step": 0.96875, "rewards/stepwise_brier_reward_long_step": 0.770275354385376, "step": 61 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 664.0, "completions/max_terminated_length": 664.0, "completions/mean_length": 246.7734375, "completions/mean_terminated_length": 246.7734375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.0992, "grad_norm": 0.03863685578107834, "learning_rate": 7.666666666666667e-07, "loss": 0.0143, "num_tokens": 30953648.0, "reward": 1.2412984371185303, "reward_std": 0.21361978352069855, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7338235974311829, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7313702702522278, "step": 62 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 518.0, "completions/max_terminated_length": 518.0, "completions/mean_length": 243.95703125, "completions/mean_terminated_length": 244.9137420654297, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.1008, "grad_norm": 0.04952770844101906, "learning_rate": 7.61111111111111e-07, "loss": -0.0062, "num_tokens": 31442845.0, "reward": 1.2129815816879272, "reward_std": 0.28704124689102173, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7710623741149902, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7683641314506531, "step": 63 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 878.0, "completions/max_terminated_length": 878.0, "completions/mean_length": 241.94140625, "completions/mean_terminated_length": 243.84645080566406, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.1024, "grad_norm": 0.08823659271001816, "learning_rate": 7.555555555555555e-07, "loss": -0.0045, "num_tokens": 31942366.0, "reward": 1.1511372327804565, "reward_std": 0.27987030148506165, "rewards/accuracy_reward_long_step": 0.2734375, "rewards/final_brier_reward_long_step": 0.7818734645843506, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7445504665374756, "step": 64 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 246.95703125, "completions/mean_terminated_length": 247.92550659179688, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.104, "grad_norm": 0.03668355569243431, "learning_rate": 7.5e-07, "loss": -0.0188, "num_tokens": 32434235.0, "reward": 1.180516004562378, "reward_std": 0.21570606529712677, "rewards/accuracy_reward_long_step": 0.30859375, "rewards/final_brier_reward_long_step": 0.7829951047897339, "rewards/format_reward_long_step": 0.984375, "rewards/stepwise_brier_reward_long_step": 0.7359441518783569, "step": 65 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 241.75, "completions/mean_terminated_length": 241.75, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.1056, "grad_norm": 0.20867598056793213, "learning_rate": 7.444444444444444e-07, "loss": 0.011, "num_tokens": 32930075.0, "reward": 1.2010526657104492, "reward_std": 0.2591190040111542, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7352352142333984, "rewards/format_reward_long_step": 0.98046875, "rewards/stepwise_brier_reward_long_step": 0.7799128890037537, "step": 66 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 817.0, "completions/max_terminated_length": 817.0, "completions/mean_length": 237.52734375, "completions/mean_terminated_length": 237.52734375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.1072, "grad_norm": 0.055712711066007614, "learning_rate": 7.388888888888889e-07, "loss": 0.0143, "num_tokens": 33426218.0, "reward": 1.307477593421936, "reward_std": 0.2710718512535095, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7135553956031799, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7351051568984985, "step": 67 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 983.0, "completions/max_terminated_length": 983.0, "completions/mean_length": 251.796875, "completions/mean_terminated_length": 251.796875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.1088, "grad_norm": 0.060389939695596695, "learning_rate": 7.333333333333332e-07, "loss": 0.0096, "num_tokens": 33915718.0, "reward": 1.2436058521270752, "reward_std": 0.32295602560043335, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.717818021774292, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.6862928867340088, "step": 68 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 652.0, "completions/max_terminated_length": 652.0, "completions/mean_length": 239.89453125, "completions/mean_terminated_length": 239.89453125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.1104, "grad_norm": 0.033029954880476, "learning_rate": 7.277777777777777e-07, "loss": 0.0144, "num_tokens": 34381011.0, "reward": 1.3730568885803223, "reward_std": 0.26877105236053467, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.6850621700286865, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7056032419204712, "step": 69 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 778.0, "completions/max_terminated_length": 778.0, "completions/mean_length": 237.265625, "completions/mean_terminated_length": 237.265625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.112, "grad_norm": 0.09013690799474716, "learning_rate": 7.222222222222221e-07, "loss": 0.0035, "num_tokens": 34869191.0, "reward": 1.251227855682373, "reward_std": 0.19920992851257324, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7329218983650208, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7641769051551819, "step": 70 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 717.0, "completions/max_terminated_length": 717.0, "completions/mean_length": 229.0859375, "completions/mean_terminated_length": 229.0859375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.1136, "grad_norm": 0.03403422236442566, "learning_rate": 7.166666666666667e-07, "loss": -0.0026, "num_tokens": 35346397.0, "reward": 1.293338656425476, "reward_std": 0.3115568161010742, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.710399866104126, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.6817047595977783, "step": 71 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 537.0, "completions/max_terminated_length": 537.0, "completions/mean_length": 225.59765625, "completions/mean_terminated_length": 226.48236083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.1152, "grad_norm": 0.04565083235502243, "learning_rate": 7.111111111111111e-07, "loss": -0.0086, "num_tokens": 35817766.0, "reward": 1.199450969696045, "reward_std": 0.1990368813276291, "rewards/accuracy_reward_long_step": 0.32421875, "rewards/final_brier_reward_long_step": 0.762933611869812, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7458075284957886, "step": 72 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 773.0, "completions/max_terminated_length": 773.0, "completions/mean_length": 237.078125, "completions/mean_terminated_length": 238.0078582763672, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.1168, "grad_norm": 0.16507937014102936, "learning_rate": 7.055555555555556e-07, "loss": 0.0011, "num_tokens": 36306426.0, "reward": 1.1114095449447632, "reward_std": 0.23765724897384644, "rewards/accuracy_reward_long_step": 0.2421875, "rewards/final_brier_reward_long_step": 0.7882089614868164, "rewards/format_reward_long_step": 0.96875, "rewards/stepwise_brier_reward_long_step": 0.7511793971061707, "step": 73 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 222.734375, "completions/mean_terminated_length": 222.734375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.1184, "grad_norm": 0.10383451730012894, "learning_rate": 7e-07, "loss": -0.0018, "num_tokens": 36773214.0, "reward": 1.2845209836959839, "reward_std": 0.2531934976577759, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.700976550579071, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.710544764995575, "step": 74 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.0, "completions/max_terminated_length": 518.0, "completions/mean_length": 227.99609375, "completions/mean_terminated_length": 227.99609375, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.12, "grad_norm": 0.04366622865200043, "learning_rate": 6.944444444444444e-07, "loss": -0.0197, "num_tokens": 37265613.0, "reward": 1.2163734436035156, "reward_std": 0.20943868160247803, "rewards/accuracy_reward_long_step": 0.359375, "rewards/final_brier_reward_long_step": 0.7348078489303589, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7166235446929932, "step": 75 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 611.0, "completions/max_terminated_length": 611.0, "completions/mean_length": 225.1484375, "completions/mean_terminated_length": 226.03138732910156, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.1216, "grad_norm": 0.06976811587810516, "learning_rate": 6.888888888888889e-07, "loss": -0.0152, "num_tokens": 37743499.0, "reward": 1.2767009735107422, "reward_std": 0.16413617134094238, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7078866958618164, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7504795789718628, "step": 76 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 231.44921875, "completions/mean_terminated_length": 231.44921875, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.1232, "grad_norm": 0.04065747186541557, "learning_rate": 6.833333333333333e-07, "loss": 0.0018, "num_tokens": 38227782.0, "reward": 1.2329548597335815, "reward_std": 0.1812242567539215, "rewards/accuracy_reward_long_step": 0.37109375, "rewards/final_brier_reward_long_step": 0.7489936351776123, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.6984509229660034, "step": 77 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 562.0, "completions/max_terminated_length": 562.0, "completions/mean_length": 227.01171875, "completions/mean_terminated_length": 227.01171875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.1248, "grad_norm": 0.03462184593081474, "learning_rate": 6.777777777777778e-07, "loss": 0.0084, "num_tokens": 38706801.0, "reward": 1.2074387073516846, "reward_std": 0.23012541234493256, "rewards/accuracy_reward_long_step": 0.35546875, "rewards/final_brier_reward_long_step": 0.7311980724334717, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.6923068761825562, "step": 78 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 601.0, "completions/max_terminated_length": 601.0, "completions/mean_length": 214.515625, "completions/mean_terminated_length": 214.515625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.1264, "grad_norm": 0.06125793233513832, "learning_rate": 6.722222222222222e-07, "loss": 0.0054, "num_tokens": 39185933.0, "reward": 1.202413558959961, "reward_std": 0.1790447235107422, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7572082281112671, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7243211269378662, "step": 79 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 209.36328125, "completions/mean_terminated_length": 209.36328125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.128, "grad_norm": 0.0341668576002121, "learning_rate": 6.666666666666666e-07, "loss": 0.005, "num_tokens": 39665874.0, "reward": 1.1575262546539307, "reward_std": 0.19264450669288635, "rewards/accuracy_reward_long_step": 0.2734375, "rewards/final_brier_reward_long_step": 0.7840448617935181, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7679354548454285, "step": 80 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 578.0, "completions/max_terminated_length": 578.0, "completions/mean_length": 218.9296875, "completions/mean_terminated_length": 218.9296875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.1296, "grad_norm": 0.056944340467453, "learning_rate": 6.611111111111111e-07, "loss": -0.0004, "num_tokens": 40145992.0, "reward": 1.2321239709854126, "reward_std": 0.288753867149353, "rewards/accuracy_reward_long_step": 0.36328125, "rewards/final_brier_reward_long_step": 0.7278487682342529, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.755334734916687, "step": 81 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 469.0, "completions/max_terminated_length": 469.0, "completions/mean_length": 216.609375, "completions/mean_terminated_length": 217.45883178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.1312, "grad_norm": 0.040777526795864105, "learning_rate": 6.555555555555555e-07, "loss": -0.0166, "num_tokens": 40628988.0, "reward": 1.2330138683319092, "reward_std": 0.2308121919631958, "rewards/accuracy_reward_long_step": 0.37109375, "rewards/final_brier_reward_long_step": 0.7352034449577332, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7202898263931274, "step": 82 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 859.0, "completions/max_terminated_length": 859.0, "completions/mean_length": 220.13671875, "completions/mean_terminated_length": 220.13671875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.1328, "grad_norm": 0.2267780750989914, "learning_rate": 6.5e-07, "loss": -0.0127, "num_tokens": 41107423.0, "reward": 1.2562687397003174, "reward_std": 0.2288559228181839, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.6950433254241943, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7362817525863647, "step": 83 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 631.0, "completions/max_terminated_length": 631.0, "completions/mean_length": 225.0703125, "completions/mean_terminated_length": 225.0703125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.1344, "grad_norm": 0.04118198901414871, "learning_rate": 6.444444444444444e-07, "loss": 0.0058, "num_tokens": 41599321.0, "reward": 1.3327488899230957, "reward_std": 0.1981910765171051, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.6561777591705322, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7295053005218506, "step": 84 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/max_terminated_length": 499.0, "completions/mean_length": 219.65625, "completions/mean_terminated_length": 219.65625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.136, "grad_norm": 0.04954572767019272, "learning_rate": 6.388888888888888e-07, "loss": 0.0006, "num_tokens": 42074473.0, "reward": 1.3892377614974976, "reward_std": 0.1905529648065567, "rewards/accuracy_reward_long_step": 0.5546875, "rewards/final_brier_reward_long_step": 0.6077094078063965, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7383042573928833, "step": 85 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 211.30078125, "completions/mean_terminated_length": 211.30078125, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.1376, "grad_norm": 0.04808489605784416, "learning_rate": 6.333333333333332e-07, "loss": -0.0179, "num_tokens": 42543374.0, "reward": 1.3056892156600952, "reward_std": 0.18410624563694, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.670364260673523, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7711423635482788, "step": 86 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 545.0, "completions/max_terminated_length": 545.0, "completions/mean_length": 216.82421875, "completions/mean_terminated_length": 217.67453002929688, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.1392, "grad_norm": 0.07117484509944916, "learning_rate": 6.277777777777777e-07, "loss": -0.0033, "num_tokens": 43021761.0, "reward": 1.3590378761291504, "reward_std": 0.20214568078517914, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.604397177696228, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.6833170652389526, "step": 87 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 584.0, "completions/max_terminated_length": 584.0, "completions/mean_length": 232.453125, "completions/mean_terminated_length": 232.453125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.1408, "grad_norm": 0.05832771211862564, "learning_rate": 6.222222222222223e-07, "loss": 0.0138, "num_tokens": 43497541.0, "reward": 1.3088263273239136, "reward_std": 0.17070481181144714, "rewards/accuracy_reward_long_step": 0.4375, "rewards/final_brier_reward_long_step": 0.6950075626373291, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.79029780626297, "step": 88 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 770.0, "completions/max_terminated_length": 770.0, "completions/mean_length": 224.8828125, "completions/mean_terminated_length": 224.8828125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.1424, "grad_norm": 0.031236618757247925, "learning_rate": 6.166666666666667e-07, "loss": -0.0017, "num_tokens": 43984239.0, "reward": 1.2366325855255127, "reward_std": 0.22545480728149414, "rewards/accuracy_reward_long_step": 0.36328125, "rewards/final_brier_reward_long_step": 0.7334580421447754, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7599474191665649, "step": 89 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 531.0, "completions/max_terminated_length": 531.0, "completions/mean_length": 220.33203125, "completions/mean_terminated_length": 220.33203125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.144, "grad_norm": 0.032982852309942245, "learning_rate": 6.111111111111112e-07, "loss": 0.0033, "num_tokens": 44459788.0, "reward": 1.2820594310760498, "reward_std": 0.24406251311302185, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.6791086792945862, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7303793430328369, "step": 90 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 521.0, "completions/max_terminated_length": 521.0, "completions/mean_length": 224.7734375, "completions/mean_terminated_length": 224.7734375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.1456, "grad_norm": 0.04948606342077255, "learning_rate": 6.055555555555555e-07, "loss": -0.0038, "num_tokens": 44927786.0, "reward": 1.312872290611267, "reward_std": 0.21360260248184204, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.681350588798523, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7732632756233215, "step": 91 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 567.0, "completions/max_terminated_length": 567.0, "completions/mean_length": 221.44140625, "completions/mean_terminated_length": 221.44140625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.1472, "grad_norm": 0.06898247450590134, "learning_rate": 6e-07, "loss": -0.0107, "num_tokens": 45407147.0, "reward": 1.3344571590423584, "reward_std": 0.21049799025058746, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.6809437870979309, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.719385027885437, "step": 92 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 233.40234375, "completions/mean_terminated_length": 233.40234375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.1488, "grad_norm": 0.034343279898166656, "learning_rate": 5.944444444444444e-07, "loss": -0.0167, "num_tokens": 45889994.0, "reward": 1.250670075416565, "reward_std": 0.20306074619293213, "rewards/accuracy_reward_long_step": 0.3828125, "rewards/final_brier_reward_long_step": 0.723064661026001, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7561779022216797, "step": 93 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.0, "completions/max_terminated_length": 593.0, "completions/mean_length": 239.5859375, "completions/mean_terminated_length": 239.5859375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.1504, "grad_norm": 0.03516862913966179, "learning_rate": 5.888888888888889e-07, "loss": -0.0025, "num_tokens": 46369656.0, "reward": 1.3316434621810913, "reward_std": 0.2240450382232666, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.6970722675323486, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7701265811920166, "step": 94 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 536.0, "completions/max_terminated_length": 536.0, "completions/mean_length": 233.03125, "completions/mean_terminated_length": 233.03125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.152, "grad_norm": 0.04016004502773285, "learning_rate": 5.833333333333334e-07, "loss": 0.0016, "num_tokens": 46861144.0, "reward": 1.2625809907913208, "reward_std": 0.21456453204154968, "rewards/accuracy_reward_long_step": 0.39453125, "rewards/final_brier_reward_long_step": 0.7184839248657227, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7693402767181396, "step": 95 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 721.0, "completions/max_terminated_length": 721.0, "completions/mean_length": 241.79296875, "completions/mean_terminated_length": 241.79296875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.1536, "grad_norm": 0.03148433566093445, "learning_rate": 5.777777777777777e-07, "loss": 0.0126, "num_tokens": 47360659.0, "reward": 1.303478479385376, "reward_std": 0.18032774329185486, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.7018147706985474, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7464738488197327, "step": 96 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 761.0, "completions/max_terminated_length": 761.0, "completions/mean_length": 241.2734375, "completions/mean_terminated_length": 241.2734375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.1552, "grad_norm": 0.03487463667988777, "learning_rate": 5.722222222222222e-07, "loss": 0.0105, "num_tokens": 47839465.0, "reward": 1.3190314769744873, "reward_std": 0.18113639950752258, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.718437910079956, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7764377593994141, "step": 97 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 614.0, "completions/max_terminated_length": 614.0, "completions/mean_length": 244.4296875, "completions/mean_terminated_length": 244.4296875, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.1568, "grad_norm": 0.06506786495447159, "learning_rate": 5.666666666666666e-07, "loss": 0.0106, "num_tokens": 48313367.0, "reward": 1.214963436126709, "reward_std": 0.18929462134838104, "rewards/accuracy_reward_long_step": 0.3203125, "rewards/final_brier_reward_long_step": 0.7821929454803467, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7964109182357788, "step": 98 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 641.0, "completions/max_terminated_length": 641.0, "completions/mean_length": 244.140625, "completions/mean_terminated_length": 244.140625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.1584, "grad_norm": 0.032129108905792236, "learning_rate": 5.611111111111111e-07, "loss": 0.0066, "num_tokens": 48807115.0, "reward": 1.2324862480163574, "reward_std": 0.1833975911140442, "rewards/accuracy_reward_long_step": 0.33984375, "rewards/final_brier_reward_long_step": 0.7741453051567078, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8042370080947876, "step": 99 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 605.0, "completions/max_terminated_length": 605.0, "completions/mean_length": 248.4140625, "completions/mean_terminated_length": 248.4140625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.16, "grad_norm": 0.041153669357299805, "learning_rate": 5.555555555555555e-07, "loss": 0.0039, "num_tokens": 49313845.0, "reward": 1.3223180770874023, "reward_std": 0.2212940752506256, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.6976035237312317, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7479186058044434, "step": 100 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 477.0, "completions/max_terminated_length": 477.0, "completions/mean_length": 235.953125, "completions/mean_terminated_length": 236.87844848632812, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.1616, "grad_norm": 0.05703693628311157, "learning_rate": 5.5e-07, "loss": -0.0125, "num_tokens": 49808721.0, "reward": 1.289008378982544, "reward_std": 0.11740753054618835, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.7392846345901489, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7995613813400269, "step": 101 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 544.0, "completions/max_terminated_length": 544.0, "completions/mean_length": 242.234375, "completions/mean_terminated_length": 242.234375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.1632, "grad_norm": 0.04496127367019653, "learning_rate": 5.444444444444443e-07, "loss": 0.0125, "num_tokens": 50293829.0, "reward": 1.2432105541229248, "reward_std": 0.18485994637012482, "rewards/accuracy_reward_long_step": 0.35546875, "rewards/final_brier_reward_long_step": 0.7693418264389038, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7816250920295715, "step": 102 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 557.0, "completions/max_terminated_length": 557.0, "completions/mean_length": 245.44921875, "completions/mean_terminated_length": 245.44921875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.1648, "grad_norm": 0.035261496901512146, "learning_rate": 5.388888888888888e-07, "loss": -0.003, "num_tokens": 50795808.0, "reward": 1.312826156616211, "reward_std": 0.2417442500591278, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.709702730178833, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.775976836681366, "step": 103 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 631.0, "completions/max_terminated_length": 631.0, "completions/mean_length": 245.59765625, "completions/mean_terminated_length": 245.59765625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.1664, "grad_norm": 0.032495662569999695, "learning_rate": 5.333333333333333e-07, "loss": -0.0091, "num_tokens": 51279649.0, "reward": 1.315579891204834, "reward_std": 0.267301082611084, "rewards/accuracy_reward_long_step": 0.4453125, "rewards/final_brier_reward_long_step": 0.7196257710456848, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7614439725875854, "step": 104 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 680.0, "completions/max_terminated_length": 680.0, "completions/mean_length": 252.44140625, "completions/mean_terminated_length": 252.44140625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.168, "grad_norm": 0.03773434832692146, "learning_rate": 5.277777777777777e-07, "loss": 0.0192, "num_tokens": 51778938.0, "reward": 1.3275742530822754, "reward_std": 0.2403724491596222, "rewards/accuracy_reward_long_step": 0.46484375, "rewards/final_brier_reward_long_step": 0.707565188407898, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7589816451072693, "step": 105 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 700.0, "completions/max_terminated_length": 700.0, "completions/mean_length": 267.77734375, "completions/mean_terminated_length": 267.77734375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.1696, "grad_norm": 0.032062213867902756, "learning_rate": 5.222222222222223e-07, "loss": -0.001, "num_tokens": 52287993.0, "reward": 1.2254037857055664, "reward_std": 0.22276735305786133, "rewards/accuracy_reward_long_step": 0.3515625, "rewards/final_brier_reward_long_step": 0.7529284954071045, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7424367070198059, "step": 106 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 666.0, "completions/max_terminated_length": 666.0, "completions/mean_length": 252.94140625, "completions/mean_terminated_length": 252.94140625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.1712, "grad_norm": 0.06173327565193176, "learning_rate": 5.166666666666667e-07, "loss": -0.002, "num_tokens": 52788314.0, "reward": 1.2225637435913086, "reward_std": 0.19366461038589478, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7824580669403076, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7796720266342163, "step": 107 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 606.0, "completions/max_terminated_length": 606.0, "completions/mean_length": 264.60546875, "completions/mean_terminated_length": 264.60546875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.1728, "grad_norm": 0.034810252487659454, "learning_rate": 5.111111111111111e-07, "loss": -0.004, "num_tokens": 53282845.0, "reward": 1.2544004917144775, "reward_std": 0.1617870330810547, "rewards/accuracy_reward_long_step": 0.37890625, "rewards/final_brier_reward_long_step": 0.7480487823486328, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7539278864860535, "step": 108 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 662.0, "completions/max_terminated_length": 662.0, "completions/mean_length": 261.3046875, "completions/mean_terminated_length": 261.3046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.1744, "grad_norm": 0.15722031891345978, "learning_rate": 5.055555555555555e-07, "loss": -0.0081, "num_tokens": 53767051.0, "reward": 1.322205901145935, "reward_std": 0.19752384722232819, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.7100498676300049, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7662734389305115, "step": 109 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 266.1015625, "completions/mean_terminated_length": 266.1015625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.176, "grad_norm": 0.03229089081287384, "learning_rate": 5e-07, "loss": -0.0056, "num_tokens": 54263453.0, "reward": 1.3309049606323242, "reward_std": 0.25080251693725586, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.713943362236023, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7815513610839844, "step": 110 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.0, "completions/max_terminated_length": 566.0, "completions/mean_length": 265.24609375, "completions/mean_terminated_length": 265.24609375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.1776, "grad_norm": 0.04050762951374054, "learning_rate": 4.944444444444445e-07, "loss": -0.0176, "num_tokens": 54756988.0, "reward": 1.2833229303359985, "reward_std": 0.1913967728614807, "rewards/accuracy_reward_long_step": 0.3984375, "rewards/final_brier_reward_long_step": 0.7617863416671753, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7777553796768188, "step": 111 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 576.0, "completions/max_terminated_length": 576.0, "completions/mean_length": 252.6796875, "completions/mean_terminated_length": 252.6796875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.1792, "grad_norm": 0.05397975817322731, "learning_rate": 4.888888888888889e-07, "loss": -0.0078, "num_tokens": 55254026.0, "reward": 1.2269587516784668, "reward_std": 0.13054987788200378, "rewards/accuracy_reward_long_step": 0.3359375, "rewards/final_brier_reward_long_step": 0.7798945903778076, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7841900587081909, "step": 112 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 533.0, "completions/max_terminated_length": 533.0, "completions/mean_length": 256.3671875, "completions/mean_terminated_length": 256.3671875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.1808, "grad_norm": 0.12474508583545685, "learning_rate": 4.833333333333333e-07, "loss": -0.009, "num_tokens": 55726928.0, "reward": 1.36446213722229, "reward_std": 0.2588464617729187, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.6960980892181396, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7773758172988892, "step": 113 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 663.0, "completions/max_terminated_length": 663.0, "completions/mean_length": 267.078125, "completions/mean_terminated_length": 267.078125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.1824, "grad_norm": 0.05340947210788727, "learning_rate": 4.777777777777778e-07, "loss": 0.0001, "num_tokens": 56212564.0, "reward": 1.3508033752441406, "reward_std": 0.174399271607399, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7279148101806641, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.784673810005188, "step": 114 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 707.0, "completions/max_terminated_length": 707.0, "completions/mean_length": 258.515625, "completions/mean_terminated_length": 258.515625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.184, "grad_norm": 0.051825981587171555, "learning_rate": 4.722222222222222e-07, "loss": 0.0029, "num_tokens": 56705280.0, "reward": 1.37041175365448, "reward_std": 0.1563851535320282, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7170413732528687, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7646055221557617, "step": 115 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 257.93359375, "completions/mean_terminated_length": 258.9450988769531, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.1856, "grad_norm": 0.056365448981523514, "learning_rate": 4.6666666666666666e-07, "loss": -0.0229, "num_tokens": 57197543.0, "reward": 1.3457661867141724, "reward_std": 0.25224822759628296, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7099324464797974, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7746949195861816, "step": 116 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 612.0, "completions/max_terminated_length": 612.0, "completions/mean_length": 272.43359375, "completions/mean_terminated_length": 272.43359375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.1872, "grad_norm": 0.042497143149375916, "learning_rate": 4.611111111111111e-07, "loss": -0.0156, "num_tokens": 57681430.0, "reward": 1.3612632751464844, "reward_std": 0.25810113549232483, "rewards/accuracy_reward_long_step": 0.49609375, "rewards/final_brier_reward_long_step": 0.6923613548278809, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7761292457580566, "step": 117 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 576.0, "completions/max_terminated_length": 576.0, "completions/mean_length": 268.60546875, "completions/mean_terminated_length": 268.60546875, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.1888, "grad_norm": 0.04082271829247475, "learning_rate": 4.555555555555555e-07, "loss": 0.0062, "num_tokens": 58166401.0, "reward": 1.3085888624191284, "reward_std": 0.18546397984027863, "rewards/accuracy_reward_long_step": 0.44140625, "rewards/final_brier_reward_long_step": 0.7159848213195801, "rewards/format_reward_long_step": 0.98828125, "rewards/stepwise_brier_reward_long_step": 0.7761832475662231, "step": 118 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 515.0, "completions/max_terminated_length": 515.0, "completions/mean_length": 276.85546875, "completions/mean_terminated_length": 276.85546875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.1904, "grad_norm": 0.031454987823963165, "learning_rate": 4.5e-07, "loss": 0.0065, "num_tokens": 58658684.0, "reward": 1.2189127206802368, "reward_std": 0.14538250863552094, "rewards/accuracy_reward_long_step": 0.32421875, "rewards/final_brier_reward_long_step": 0.7811195254325867, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7976564168930054, "step": 119 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 522.0, "completions/max_terminated_length": 522.0, "completions/mean_length": 278.578125, "completions/mean_terminated_length": 278.578125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.192, "grad_norm": 0.031778957694768906, "learning_rate": 4.444444444444444e-07, "loss": -0.0123, "num_tokens": 59138176.0, "reward": 1.3575904369354248, "reward_std": 0.16297504305839539, "rewards/accuracy_reward_long_step": 0.48046875, "rewards/final_brier_reward_long_step": 0.7171749472618103, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7913117408752441, "step": 120 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 647.0, "completions/max_terminated_length": 647.0, "completions/mean_length": 284.1015625, "completions/mean_terminated_length": 284.1015625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.1936, "grad_norm": 0.046684540808200836, "learning_rate": 4.3888888888888884e-07, "loss": -0.0039, "num_tokens": 59613906.0, "reward": 1.4500417709350586, "reward_std": 0.21346309781074524, "rewards/accuracy_reward_long_step": 0.609375, "rewards/final_brier_reward_long_step": 0.6556953191757202, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7225968837738037, "step": 121 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 666.0, "completions/max_terminated_length": 666.0, "completions/mean_length": 290.79296875, "completions/mean_terminated_length": 290.79296875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.1952, "grad_norm": 0.03638645261526108, "learning_rate": 4.3333333333333335e-07, "loss": 0.0073, "num_tokens": 60115509.0, "reward": 1.2977830171585083, "reward_std": 0.22426971793174744, "rewards/accuracy_reward_long_step": 0.4140625, "rewards/final_brier_reward_long_step": 0.7605781555175781, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7821164131164551, "step": 122 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 612.0, "completions/max_terminated_length": 612.0, "completions/mean_length": 276.171875, "completions/mean_terminated_length": 276.171875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.1968, "grad_norm": 0.0302373468875885, "learning_rate": 4.2777777777777775e-07, "loss": -0.0006, "num_tokens": 60614385.0, "reward": 1.415604829788208, "reward_std": 0.17988049983978271, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.6876656413078308, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7794409990310669, "step": 123 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 287.3828125, "completions/mean_terminated_length": 287.3828125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.1984, "grad_norm": 0.05541790649294853, "learning_rate": 4.222222222222222e-07, "loss": 0.003, "num_tokens": 61111347.0, "reward": 1.2961609363555908, "reward_std": 0.21454349160194397, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.7439448833465576, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8078866004943848, "step": 124 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/max_terminated_length": 710.0, "completions/mean_length": 294.57421875, "completions/mean_terminated_length": 294.57421875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.2, "grad_norm": 0.028944578021764755, "learning_rate": 4.1666666666666667e-07, "loss": 0.017, "num_tokens": 61617174.0, "reward": 1.35416841506958, "reward_std": 0.1785564422607422, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7367419004440308, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7736819982528687, "step": 125 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 641.0, "completions/max_terminated_length": 641.0, "completions/mean_length": 284.69921875, "completions/mean_terminated_length": 284.69921875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.2016, "grad_norm": 0.06281944364309311, "learning_rate": 4.1111111111111107e-07, "loss": 0.0122, "num_tokens": 62122729.0, "reward": 1.3892717361450195, "reward_std": 0.2381824553012848, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.7210512161254883, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7969735860824585, "step": 126 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 565.0, "completions/max_terminated_length": 565.0, "completions/mean_length": 285.65625, "completions/mean_terminated_length": 285.65625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.2032, "grad_norm": 0.04049834981560707, "learning_rate": 4.055555555555555e-07, "loss": 0.0137, "num_tokens": 62628897.0, "reward": 1.3196742534637451, "reward_std": 0.1803930401802063, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7413291931152344, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8029927015304565, "step": 127 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 725.0, "completions/max_terminated_length": 725.0, "completions/mean_length": 289.6796875, "completions/mean_terminated_length": 289.6796875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.2048, "grad_norm": 0.045730676501989365, "learning_rate": 4e-07, "loss": 0.0021, "num_tokens": 63135039.0, "reward": 1.3337769508361816, "reward_std": 0.22858937084674835, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.7338937520980835, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7730889320373535, "step": 128 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 728.0, "completions/max_terminated_length": 728.0, "completions/mean_length": 287.39453125, "completions/mean_terminated_length": 287.39453125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.2064, "grad_norm": 0.027239752933382988, "learning_rate": 3.9444444444444444e-07, "loss": -0.0029, "num_tokens": 63639908.0, "reward": 1.3897987604141235, "reward_std": 0.2298661321401596, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.6942844390869141, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8102232217788696, "step": 129 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 643.0, "completions/max_terminated_length": 643.0, "completions/mean_length": 295.29296875, "completions/mean_terminated_length": 295.29296875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.208, "grad_norm": 0.030676817521452904, "learning_rate": 3.888888888888889e-07, "loss": -0.0058, "num_tokens": 64123943.0, "reward": 1.3153623342514038, "reward_std": 0.15915831923484802, "rewards/accuracy_reward_long_step": 0.42578125, "rewards/final_brier_reward_long_step": 0.7673367261886597, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7909875512123108, "step": 130 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 651.0, "completions/max_terminated_length": 651.0, "completions/mean_length": 297.234375, "completions/mean_terminated_length": 297.234375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.2096, "grad_norm": 0.06891848146915436, "learning_rate": 3.8333333333333335e-07, "loss": 0.0102, "num_tokens": 64630875.0, "reward": 1.3944809436798096, "reward_std": 0.14757326245307922, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7101758122444153, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7896230220794678, "step": 131 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 302.23046875, "completions/mean_terminated_length": 302.23046875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.2112, "grad_norm": 0.09698417037725449, "learning_rate": 3.7777777777777775e-07, "loss": -0.0078, "num_tokens": 65137038.0, "reward": 1.2600277662277222, "reward_std": 0.17520007491111755, "rewards/accuracy_reward_long_step": 0.36328125, "rewards/final_brier_reward_long_step": 0.7893859148025513, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7976003885269165, "step": 132 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 664.0, "completions/max_terminated_length": 664.0, "completions/mean_length": 296.4375, "completions/mean_terminated_length": 296.4375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.2128, "grad_norm": 0.03775089234113693, "learning_rate": 3.722222222222222e-07, "loss": 0.0009, "num_tokens": 65633878.0, "reward": 1.2865660190582275, "reward_std": 0.23296663165092468, "rewards/accuracy_reward_long_step": 0.40234375, "rewards/final_brier_reward_long_step": 0.7601765394210815, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7767128944396973, "step": 133 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 610.0, "completions/max_terminated_length": 610.0, "completions/mean_length": 290.83203125, "completions/mean_terminated_length": 290.83203125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.2144, "grad_norm": 0.030530598014593124, "learning_rate": 3.666666666666666e-07, "loss": 0.01, "num_tokens": 66127099.0, "reward": 1.2935059070587158, "reward_std": 0.20468412339687347, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.7623168230056763, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7788943648338318, "step": 134 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 701.0, "completions/max_terminated_length": 701.0, "completions/mean_length": 310.86328125, "completions/mean_terminated_length": 310.86328125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.216, "grad_norm": 0.02592829428613186, "learning_rate": 3.6111111111111107e-07, "loss": -0.0094, "num_tokens": 66623992.0, "reward": 1.4253921508789062, "reward_std": 0.12001308053731918, "rewards/accuracy_reward_long_step": 0.55078125, "rewards/final_brier_reward_long_step": 0.7179234027862549, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7805203795433044, "step": 135 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 768.0, "completions/max_terminated_length": 768.0, "completions/mean_length": 316.38671875, "completions/mean_terminated_length": 317.6274719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.2176, "grad_norm": 0.031040871515870094, "learning_rate": 3.5555555555555553e-07, "loss": -0.0145, "num_tokens": 67146987.0, "reward": 1.2932628393173218, "reward_std": 0.1893351525068283, "rewards/accuracy_reward_long_step": 0.40625, "rewards/final_brier_reward_long_step": 0.7708644866943359, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7928118109703064, "step": 136 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 623.0, "completions/max_terminated_length": 623.0, "completions/mean_length": 298.63671875, "completions/mean_terminated_length": 298.63671875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.2192, "grad_norm": 0.1331356018781662, "learning_rate": 3.5e-07, "loss": 0.0031, "num_tokens": 67653382.0, "reward": 1.4031734466552734, "reward_std": 0.19260135293006897, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7188730239868164, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8156960010528564, "step": 137 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 573.0, "completions/max_terminated_length": 573.0, "completions/mean_length": 304.4140625, "completions/mean_terminated_length": 304.4140625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.2208, "grad_norm": 0.030995018780231476, "learning_rate": 3.4444444444444444e-07, "loss": -0.0093, "num_tokens": 68164376.0, "reward": 1.3275476694107056, "reward_std": 0.18350914120674133, "rewards/accuracy_reward_long_step": 0.4296875, "rewards/final_brier_reward_long_step": 0.7638988494873047, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8275418281555176, "step": 138 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 642.0, "completions/max_terminated_length": 642.0, "completions/mean_length": 308.921875, "completions/mean_terminated_length": 308.921875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.2224, "grad_norm": 0.03901790454983711, "learning_rate": 3.388888888888889e-07, "loss": 0.0065, "num_tokens": 68669452.0, "reward": 1.2002668380737305, "reward_std": 0.15861909091472626, "rewards/accuracy_reward_long_step": 0.296875, "rewards/final_brier_reward_long_step": 0.8035297393798828, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8100374341011047, "step": 139 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 648.0, "completions/max_terminated_length": 648.0, "completions/mean_length": 304.37109375, "completions/mean_terminated_length": 304.37109375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.224, "grad_norm": 0.028289683163166046, "learning_rate": 3.333333333333333e-07, "loss": -0.0064, "num_tokens": 69179523.0, "reward": 1.3940101861953735, "reward_std": 0.18012776970863342, "rewards/accuracy_reward_long_step": 0.51953125, "rewards/final_brier_reward_long_step": 0.7299957275390625, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7679198980331421, "step": 140 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 715.0, "completions/max_terminated_length": 715.0, "completions/mean_length": 301.6328125, "completions/mean_terminated_length": 301.6328125, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.2256, "grad_norm": 0.032248951494693756, "learning_rate": 3.2777777777777776e-07, "loss": -0.0042, "num_tokens": 69689229.0, "reward": 1.339975357055664, "reward_std": 0.1697162687778473, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.754450798034668, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7773258686065674, "step": 141 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 555.0, "completions/max_terminated_length": 555.0, "completions/mean_length": 308.71875, "completions/mean_terminated_length": 308.71875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.2272, "grad_norm": 0.0788806900382042, "learning_rate": 3.222222222222222e-07, "loss": 0.0148, "num_tokens": 70210885.0, "reward": 1.3627147674560547, "reward_std": 0.17580462992191315, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7406051158905029, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7727540731430054, "step": 142 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 619.0, "completions/max_terminated_length": 619.0, "completions/mean_length": 307.53125, "completions/mean_terminated_length": 307.53125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.2288, "grad_norm": 0.02919752337038517, "learning_rate": 3.166666666666666e-07, "loss": 0.0041, "num_tokens": 70731837.0, "reward": 1.425824761390686, "reward_std": 0.2417382448911667, "rewards/accuracy_reward_long_step": 0.55859375, "rewards/final_brier_reward_long_step": 0.6927086114883423, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.776215672492981, "step": 143 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 748.0, "completions/max_terminated_length": 748.0, "completions/mean_length": 303.9921875, "completions/mean_terminated_length": 303.9921875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.2304, "grad_norm": 0.02776823192834854, "learning_rate": 3.111111111111111e-07, "loss": 0.0044, "num_tokens": 71232115.0, "reward": 1.3090643882751465, "reward_std": 0.18515193462371826, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7715655565261841, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7928170561790466, "step": 144 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 728.0, "completions/max_terminated_length": 728.0, "completions/mean_length": 299.80859375, "completions/mean_terminated_length": 299.80859375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.232, "grad_norm": 0.030704284086823463, "learning_rate": 3.055555555555556e-07, "loss": 0.0199, "num_tokens": 71750330.0, "reward": 1.3886297941207886, "reward_std": 0.21446546912193298, "rewards/accuracy_reward_long_step": 0.5, "rewards/final_brier_reward_long_step": 0.7539929747581482, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.80052649974823, "step": 145 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/max_terminated_length": 530.0, "completions/mean_length": 300.62109375, "completions/mean_terminated_length": 300.62109375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.2336, "grad_norm": 0.031654492020606995, "learning_rate": 3e-07, "loss": -0.0019, "num_tokens": 72260633.0, "reward": 1.4397811889648438, "reward_std": 0.20938704907894135, "rewards/accuracy_reward_long_step": 0.5625, "rewards/final_brier_reward_long_step": 0.7215574383735657, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7875672578811646, "step": 146 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 858.0, "completions/max_terminated_length": 858.0, "completions/mean_length": 306.19921875, "completions/mean_terminated_length": 306.19921875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.2352, "grad_norm": 0.19715657830238342, "learning_rate": 2.9444444444444444e-07, "loss": 0.0069, "num_tokens": 72758188.0, "reward": 1.4754751920700073, "reward_std": 0.26457875967025757, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.6992788910865784, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7807470560073853, "step": 147 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 594.0, "completions/max_terminated_length": 594.0, "completions/mean_length": 307.36328125, "completions/mean_terminated_length": 307.36328125, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.2368, "grad_norm": 0.03840313479304314, "learning_rate": 2.8888888888888885e-07, "loss": -0.0044, "num_tokens": 73264385.0, "reward": 1.513301134109497, "reward_std": 0.1907767504453659, "rewards/accuracy_reward_long_step": 0.6484375, "rewards/final_brier_reward_long_step": 0.7094078063964844, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7500470876693726, "step": 148 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 612.0, "completions/max_terminated_length": 612.0, "completions/mean_length": 303.8828125, "completions/mean_terminated_length": 303.8828125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.2384, "grad_norm": 0.027224667370319366, "learning_rate": 2.833333333333333e-07, "loss": -0.0015, "num_tokens": 73775123.0, "reward": 1.2874629497528076, "reward_std": 0.16957233846187592, "rewards/accuracy_reward_long_step": 0.39453125, "rewards/final_brier_reward_long_step": 0.7941582202911377, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7853813767433167, "step": 149 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 597.0, "completions/max_terminated_length": 597.0, "completions/mean_length": 312.90625, "completions/mean_terminated_length": 312.90625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.24, "grad_norm": 0.04304026812314987, "learning_rate": 2.7777777777777776e-07, "loss": 0.0097, "num_tokens": 74265443.0, "reward": 1.3638887405395508, "reward_std": 0.27244532108306885, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7445312738418579, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8047733306884766, "step": 150 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 297.63671875, "completions/mean_terminated_length": 297.63671875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.2416, "grad_norm": 0.03442637622356415, "learning_rate": 2.7222222222222216e-07, "loss": -0.0125, "num_tokens": 74766262.0, "reward": 1.455894947052002, "reward_std": 0.17580869793891907, "rewards/accuracy_reward_long_step": 0.5703125, "rewards/final_brier_reward_long_step": 0.7380698919296265, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8042596578598022, "step": 151 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 687.0, "completions/max_terminated_length": 687.0, "completions/mean_length": 299.35546875, "completions/mean_terminated_length": 299.35546875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.2432, "grad_norm": 0.02799561619758606, "learning_rate": 2.6666666666666667e-07, "loss": -0.0007, "num_tokens": 75254505.0, "reward": 1.2745752334594727, "reward_std": 0.1415172815322876, "rewards/accuracy_reward_long_step": 0.3828125, "rewards/final_brier_reward_long_step": 0.7780320644378662, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.789018988609314, "step": 152 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 644.0, "completions/max_terminated_length": 644.0, "completions/mean_length": 298.35546875, "completions/mean_terminated_length": 298.35546875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.2448, "grad_norm": 0.03639660403132439, "learning_rate": 2.6111111111111113e-07, "loss": 0.0153, "num_tokens": 75757548.0, "reward": 1.4095165729522705, "reward_std": 0.24766717851161957, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7201319932937622, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7929338216781616, "step": 153 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 293.26953125, "completions/mean_terminated_length": 293.26953125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.2464, "grad_norm": 0.026929931715130806, "learning_rate": 2.5555555555555553e-07, "loss": -0.0083, "num_tokens": 76259353.0, "reward": 1.49515962600708, "reward_std": 0.14950111508369446, "rewards/accuracy_reward_long_step": 0.6171875, "rewards/final_brier_reward_long_step": 0.720660924911499, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.791227400302887, "step": 154 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/max_terminated_length": 639.0, "completions/mean_length": 294.78515625, "completions/mean_terminated_length": 294.78515625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.248, "grad_norm": 0.060369208455085754, "learning_rate": 2.5e-07, "loss": 0.0095, "num_tokens": 76766810.0, "reward": 1.409111738204956, "reward_std": 0.20820903778076172, "rewards/accuracy_reward_long_step": 0.5234375, "rewards/final_brier_reward_long_step": 0.754862904548645, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7956463098526001, "step": 155 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 586.0, "completions/max_terminated_length": 586.0, "completions/mean_length": 314.47265625, "completions/mean_terminated_length": 315.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.2496, "grad_norm": 0.031120678409934044, "learning_rate": 2.4444444444444445e-07, "loss": -0.0077, "num_tokens": 77273011.0, "reward": 1.3123760223388672, "reward_std": 0.2050066739320755, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7430413961410522, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.779900074005127, "step": 156 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 596.0, "completions/max_terminated_length": 596.0, "completions/mean_length": 305.6796875, "completions/mean_terminated_length": 305.6796875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.2512, "grad_norm": 0.028397539630532265, "learning_rate": 2.388888888888889e-07, "loss": 0.0104, "num_tokens": 77771369.0, "reward": 1.4772114753723145, "reward_std": 0.20590314269065857, "rewards/accuracy_reward_long_step": 0.59375, "rewards/final_brier_reward_long_step": 0.7362457513809204, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7976003289222717, "step": 157 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 312.51171875, "completions/mean_terminated_length": 312.51171875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.2528, "grad_norm": 0.03033839352428913, "learning_rate": 2.3333333333333333e-07, "loss": -0.0084, "num_tokens": 78293932.0, "reward": 1.250650405883789, "reward_std": 0.20709839463233948, "rewards/accuracy_reward_long_step": 0.3515625, "rewards/final_brier_reward_long_step": 0.780035138130188, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8163163661956787, "step": 158 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 705.0, "completions/max_terminated_length": 705.0, "completions/mean_length": 311.6640625, "completions/mean_terminated_length": 311.6640625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.2544, "grad_norm": 0.03519049659371376, "learning_rate": 2.2777777777777776e-07, "loss": -0.0036, "num_tokens": 78801126.0, "reward": 1.2244961261749268, "reward_std": 0.17444197833538055, "rewards/accuracy_reward_long_step": 0.31640625, "rewards/final_brier_reward_long_step": 0.8095117211341858, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.822847843170166, "step": 159 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 610.0, "completions/max_terminated_length": 610.0, "completions/mean_length": 294.1328125, "completions/mean_terminated_length": 294.1328125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.256, "grad_norm": 0.04099366441369057, "learning_rate": 2.222222222222222e-07, "loss": -0.0079, "num_tokens": 79294264.0, "reward": 1.3805627822875977, "reward_std": 0.18659856915473938, "rewards/accuracy_reward_long_step": 0.48828125, "rewards/final_brier_reward_long_step": 0.7798925638198853, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.789233922958374, "step": 160 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 563.0, "completions/max_terminated_length": 563.0, "completions/mean_length": 312.27734375, "completions/mean_terminated_length": 312.27734375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.2576, "grad_norm": 0.05054928734898567, "learning_rate": 2.1666666666666667e-07, "loss": -0.0004, "num_tokens": 79797063.0, "reward": 1.3297412395477295, "reward_std": 0.17361152172088623, "rewards/accuracy_reward_long_step": 0.4375, "rewards/final_brier_reward_long_step": 0.766510546207428, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.802454948425293, "step": 161 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 709.0, "completions/max_terminated_length": 709.0, "completions/mean_length": 310.8359375, "completions/mean_terminated_length": 310.8359375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.2592, "grad_norm": 0.02891719527542591, "learning_rate": 2.111111111111111e-07, "loss": -0.003, "num_tokens": 80291645.0, "reward": 1.264817237854004, "reward_std": 0.187312513589859, "rewards/accuracy_reward_long_step": 0.375, "rewards/final_brier_reward_long_step": 0.7664257884025574, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7928431034088135, "step": 162 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 673.0, "completions/max_terminated_length": 673.0, "completions/mean_length": 312.3671875, "completions/mean_terminated_length": 312.3671875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.2608, "grad_norm": 0.040000658482313156, "learning_rate": 2.0555555555555553e-07, "loss": 0.0004, "num_tokens": 80794195.0, "reward": 1.4218961000442505, "reward_std": 0.21327157318592072, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7526370882987976, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8099474906921387, "step": 163 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 742.0, "completions/max_terminated_length": 742.0, "completions/mean_length": 309.671875, "completions/mean_terminated_length": 309.671875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.2624, "grad_norm": 0.03159433230757713, "learning_rate": 2e-07, "loss": 0.0153, "num_tokens": 81305319.0, "reward": 1.4856319427490234, "reward_std": 0.19633854925632477, "rewards/accuracy_reward_long_step": 0.60546875, "rewards/final_brier_reward_long_step": 0.7205570340156555, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8000956773757935, "step": 164 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 948.0, "completions/max_terminated_length": 948.0, "completions/mean_length": 313.97265625, "completions/mean_terminated_length": 313.97265625, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.264, "grad_norm": 0.027477793395519257, "learning_rate": 1.9444444444444445e-07, "loss": -0.0033, "num_tokens": 81799832.0, "reward": 1.4248054027557373, "reward_std": 0.2430860996246338, "rewards/accuracy_reward_long_step": 0.5390625, "rewards/final_brier_reward_long_step": 0.722133219242096, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8208386898040771, "step": 165 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 706.0, "completions/max_terminated_length": 706.0, "completions/mean_length": 311.08203125, "completions/mean_terminated_length": 311.08203125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.2656, "grad_norm": 0.09807036817073822, "learning_rate": 1.8888888888888888e-07, "loss": -0.0055, "num_tokens": 82305733.0, "reward": 1.256960391998291, "reward_std": 0.20458321273326874, "rewards/accuracy_reward_long_step": 0.36328125, "rewards/final_brier_reward_long_step": 0.7870085835456848, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7877079248428345, "step": 166 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 811.0, "completions/max_terminated_length": 811.0, "completions/mean_length": 318.3828125, "completions/mean_terminated_length": 318.3828125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.2672, "grad_norm": 0.02899632230401039, "learning_rate": 1.833333333333333e-07, "loss": -0.0024, "num_tokens": 82827687.0, "reward": 1.2728254795074463, "reward_std": 0.17899630963802338, "rewards/accuracy_reward_long_step": 0.3828125, "rewards/final_brier_reward_long_step": 0.7677257657051086, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7923259735107422, "step": 167 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 574.0, "completions/max_terminated_length": 574.0, "completions/mean_length": 312.75, "completions/mean_terminated_length": 312.75, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.2688, "grad_norm": 0.026410935446619987, "learning_rate": 1.7777777777777776e-07, "loss": 0.0074, "num_tokens": 83331319.0, "reward": 1.300147294998169, "reward_std": 0.16780969500541687, "rewards/accuracy_reward_long_step": 0.40234375, "rewards/final_brier_reward_long_step": 0.7630214691162109, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8281925916671753, "step": 168 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 561.0, "completions/max_terminated_length": 561.0, "completions/mean_length": 314.34375, "completions/mean_terminated_length": 314.34375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.2704, "grad_norm": 0.048373743891716, "learning_rate": 1.7222222222222222e-07, "loss": 0.0079, "num_tokens": 83842919.0, "reward": 1.2883470058441162, "reward_std": 0.19052302837371826, "rewards/accuracy_reward_long_step": 0.390625, "rewards/final_brier_reward_long_step": 0.7791671752929688, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.81172114610672, "step": 169 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 309.91796875, "completions/mean_terminated_length": 309.91796875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.272, "grad_norm": 0.0328993983566761, "learning_rate": 1.6666666666666665e-07, "loss": 0.014, "num_tokens": 84327362.0, "reward": 1.3099801540374756, "reward_std": 0.1611907184123993, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7775378823280334, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7983198761940002, "step": 170 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 730.0, "completions/max_terminated_length": 730.0, "completions/mean_length": 319.8203125, "completions/mean_terminated_length": 319.8203125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.2736, "grad_norm": 0.0332467257976532, "learning_rate": 1.611111111111111e-07, "loss": -0.0146, "num_tokens": 84835268.0, "reward": 1.339202642440796, "reward_std": 0.20696350932121277, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.750048041343689, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7942621111869812, "step": 171 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 686.0, "completions/max_terminated_length": 686.0, "completions/mean_length": 324.1171875, "completions/mean_terminated_length": 324.1171875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.2752, "grad_norm": 0.05583081394433975, "learning_rate": 1.5555555555555556e-07, "loss": -0.0116, "num_tokens": 85354394.0, "reward": 1.2257599830627441, "reward_std": 0.1692497879266739, "rewards/accuracy_reward_long_step": 0.33203125, "rewards/final_brier_reward_long_step": 0.7842777371406555, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7906370162963867, "step": 172 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 627.0, "completions/max_terminated_length": 627.0, "completions/mean_length": 304.73828125, "completions/mean_terminated_length": 304.73828125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.2768, "grad_norm": 0.03522387892007828, "learning_rate": 1.5e-07, "loss": 0.0098, "num_tokens": 85859007.0, "reward": 1.3143270015716553, "reward_std": 0.23036913573741913, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7498409748077393, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8355921506881714, "step": 173 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 561.0, "completions/max_terminated_length": 561.0, "completions/mean_length": 302.87109375, "completions/mean_terminated_length": 302.87109375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.2784, "grad_norm": 0.027995487675070763, "learning_rate": 1.4444444444444442e-07, "loss": -0.0041, "num_tokens": 86369230.0, "reward": 1.2683026790618896, "reward_std": 0.22589144110679626, "rewards/accuracy_reward_long_step": 0.3828125, "rewards/final_brier_reward_long_step": 0.7601765394210815, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7817836999893188, "step": 174 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 688.0, "completions/max_terminated_length": 688.0, "completions/mean_length": 307.44921875, "completions/mean_terminated_length": 307.44921875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.28, "grad_norm": 0.028536871075630188, "learning_rate": 1.3888888888888888e-07, "loss": 0.0019, "num_tokens": 86862377.0, "reward": 1.4214547872543335, "reward_std": 0.19105279445648193, "rewards/accuracy_reward_long_step": 0.53125, "rewards/final_brier_reward_long_step": 0.7514737844467163, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8093452453613281, "step": 175 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 657.0, "completions/max_terminated_length": 657.0, "completions/mean_length": 306.5390625, "completions/mean_terminated_length": 306.5390625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.2816, "grad_norm": 0.027810126543045044, "learning_rate": 1.3333333333333334e-07, "loss": -0.0032, "num_tokens": 87353779.0, "reward": 1.3975551128387451, "reward_std": 0.1693097949028015, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.7351202964782715, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8082250952720642, "step": 176 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 726.0, "completions/max_terminated_length": 726.0, "completions/mean_length": 302.92578125, "completions/mean_terminated_length": 302.92578125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.2832, "grad_norm": 0.027919691056013107, "learning_rate": 1.2777777777777777e-07, "loss": -0.0057, "num_tokens": 87859856.0, "reward": 1.3061156272888184, "reward_std": 0.17378166317939758, "rewards/accuracy_reward_long_step": 0.421875, "rewards/final_brier_reward_long_step": 0.7432960867881775, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7936666011810303, "step": 177 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 625.0, "completions/max_terminated_length": 625.0, "completions/mean_length": 317.79296875, "completions/mean_terminated_length": 317.79296875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.2848, "grad_norm": 0.026690706610679626, "learning_rate": 1.2222222222222222e-07, "loss": 0.007, "num_tokens": 88358995.0, "reward": 1.3103278875350952, "reward_std": 0.1235455721616745, "rewards/accuracy_reward_long_step": 0.41015625, "rewards/final_brier_reward_long_step": 0.7893308401107788, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8113558292388916, "step": 178 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 582.0, "completions/max_terminated_length": 582.0, "completions/mean_length": 304.3828125, "completions/mean_terminated_length": 304.3828125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.2864, "grad_norm": 0.0654895007610321, "learning_rate": 1.1666666666666667e-07, "loss": -0.0002, "num_tokens": 88861517.0, "reward": 1.3416030406951904, "reward_std": 0.18121816217899323, "rewards/accuracy_reward_long_step": 0.44921875, "rewards/final_brier_reward_long_step": 0.7645456790924072, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8128038644790649, "step": 179 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 631.0, "completions/max_terminated_length": 631.0, "completions/mean_length": 303.37890625, "completions/mean_terminated_length": 303.37890625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.288, "grad_norm": 0.029484573751688004, "learning_rate": 1.111111111111111e-07, "loss": 0.001, "num_tokens": 89350494.0, "reward": 1.4031870365142822, "reward_std": 0.20662115514278412, "rewards/accuracy_reward_long_step": 0.52734375, "rewards/final_brier_reward_long_step": 0.7230523228645325, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7803207039833069, "step": 180 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 311.08984375, "completions/mean_terminated_length": 311.08984375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.2896, "grad_norm": 0.06027643755078316, "learning_rate": 1.0555555555555555e-07, "loss": 0.0061, "num_tokens": 89854605.0, "reward": 1.3537577390670776, "reward_std": 0.21486344933509827, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7450315952301025, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7871868014335632, "step": 181 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 754.0, "completions/max_terminated_length": 754.0, "completions/mean_length": 304.125, "completions/mean_terminated_length": 304.125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.2912, "grad_norm": 0.027213167399168015, "learning_rate": 1e-07, "loss": -0.0018, "num_tokens": 90350773.0, "reward": 1.3099863529205322, "reward_std": 0.2428930103778839, "rewards/accuracy_reward_long_step": 0.43359375, "rewards/final_brier_reward_long_step": 0.7415887117385864, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.7796065807342529, "step": 182 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 606.0, "completions/max_terminated_length": 606.0, "completions/mean_length": 304.78515625, "completions/mean_terminated_length": 304.78515625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.2928, "grad_norm": 0.028131412342190742, "learning_rate": 9.444444444444444e-08, "loss": -0.0014, "num_tokens": 90842758.0, "reward": 1.399085521697998, "reward_std": 0.13641595840454102, "rewards/accuracy_reward_long_step": 0.51171875, "rewards/final_brier_reward_long_step": 0.7427164316177368, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8067511916160583, "step": 183 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 735.0, "completions/max_terminated_length": 735.0, "completions/mean_length": 319.765625, "completions/mean_terminated_length": 319.765625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.2944, "grad_norm": 0.03541647642850876, "learning_rate": 8.888888888888888e-08, "loss": 0.0052, "num_tokens": 91348666.0, "reward": 1.3956681489944458, "reward_std": 0.1705297827720642, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.7347894906997681, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.7931956052780151, "step": 184 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 718.0, "completions/max_terminated_length": 718.0, "completions/mean_length": 314.33203125, "completions/mean_terminated_length": 314.33203125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.296, "grad_norm": 0.032346244901418686, "learning_rate": 8.333333333333333e-08, "loss": -0.0086, "num_tokens": 91854359.0, "reward": 1.2988712787628174, "reward_std": 0.18167896568775177, "rewards/accuracy_reward_long_step": 0.41796875, "rewards/final_brier_reward_long_step": 0.7545672059059143, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7690430879592896, "step": 185 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 734.0, "completions/max_terminated_length": 734.0, "completions/mean_length": 307.546875, "completions/mean_terminated_length": 307.546875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.2976, "grad_norm": 0.027610866352915764, "learning_rate": 7.777777777777778e-08, "loss": 0.0048, "num_tokens": 92349963.0, "reward": 1.4005792140960693, "reward_std": 0.19216248393058777, "rewards/accuracy_reward_long_step": 0.515625, "rewards/final_brier_reward_long_step": 0.7557350993156433, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7840815782546997, "step": 186 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 635.0, "completions/max_terminated_length": 635.0, "completions/mean_length": 320.875, "completions/mean_terminated_length": 320.875, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.2992, "grad_norm": 0.027752123773097992, "learning_rate": 7.222222222222221e-08, "loss": 0.0079, "num_tokens": 92853435.0, "reward": 1.3692231178283691, "reward_std": 0.2207336574792862, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7303339838981628, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8168707489967346, "step": 187 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 672.0, "completions/max_terminated_length": 672.0, "completions/mean_length": 323.48046875, "completions/mean_terminated_length": 323.48046875, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.3008, "grad_norm": 0.03371801972389221, "learning_rate": 6.666666666666667e-08, "loss": 0.0037, "num_tokens": 93376046.0, "reward": 1.347980260848999, "reward_std": 0.1777208149433136, "rewards/accuracy_reward_long_step": 0.4609375, "rewards/final_brier_reward_long_step": 0.7578449249267578, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7903263568878174, "step": 188 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 613.0, "completions/max_terminated_length": 613.0, "completions/mean_length": 312.94140625, "completions/mean_terminated_length": 312.94140625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.3024, "grad_norm": 0.02993335761129856, "learning_rate": 6.111111111111111e-08, "loss": -0.001, "num_tokens": 93872863.0, "reward": 1.372868299484253, "reward_std": 0.10951399803161621, "rewards/accuracy_reward_long_step": 0.484375, "rewards/final_brier_reward_long_step": 0.7652988433837891, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7886741161346436, "step": 189 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 623.0, "completions/max_terminated_length": 623.0, "completions/mean_length": 316.453125, "completions/mean_terminated_length": 316.453125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.304, "grad_norm": 0.038850028067827225, "learning_rate": 5.555555555555555e-08, "loss": -0.0096, "num_tokens": 94377163.0, "reward": 1.2374032735824585, "reward_std": 0.2162398397922516, "rewards/accuracy_reward_long_step": 0.34765625, "rewards/final_brier_reward_long_step": 0.7705594301223755, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7884289026260376, "step": 190 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 759.0, "completions/max_terminated_length": 759.0, "completions/mean_length": 310.67578125, "completions/mean_terminated_length": 310.67578125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.3056, "grad_norm": 0.09805744141340256, "learning_rate": 5e-08, "loss": -0.0029, "num_tokens": 94885656.0, "reward": 1.3634986877441406, "reward_std": 0.14901340007781982, "rewards/accuracy_reward_long_step": 0.4765625, "rewards/final_brier_reward_long_step": 0.7494964599609375, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8060606718063354, "step": 191 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 655.0, "completions/max_terminated_length": 655.0, "completions/mean_length": 323.4453125, "completions/mean_terminated_length": 323.4453125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.3072, "grad_norm": 0.036289751529693604, "learning_rate": 4.444444444444444e-08, "loss": 0.0008, "num_tokens": 95401338.0, "reward": 1.1621966361999512, "reward_std": 0.17893055081367493, "rewards/accuracy_reward_long_step": 0.2578125, "rewards/final_brier_reward_long_step": 0.8198968768119812, "rewards/format_reward_long_step": 0.9921875, "rewards/stepwise_brier_reward_long_step": 0.8132648468017578, "step": 192 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 616.0, "completions/max_terminated_length": 616.0, "completions/mean_length": 317.6953125, "completions/mean_terminated_length": 317.6953125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.3088, "grad_norm": 0.033580850809812546, "learning_rate": 3.888888888888889e-08, "loss": 0.0163, "num_tokens": 95915484.0, "reward": 1.4245002269744873, "reward_std": 0.18409396708011627, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.7327523231506348, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.793373167514801, "step": 193 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 319.3046875, "completions/mean_terminated_length": 319.3046875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.3104, "grad_norm": 0.0271166805177927, "learning_rate": 3.3333333333333334e-08, "loss": 0.0106, "num_tokens": 96427698.0, "reward": 1.3507544994354248, "reward_std": 0.23310068249702454, "rewards/accuracy_reward_long_step": 0.4609375, "rewards/final_brier_reward_long_step": 0.7412347197532654, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8258461356163025, "step": 194 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 563.0, "completions/max_terminated_length": 563.0, "completions/mean_length": 311.0546875, "completions/mean_terminated_length": 311.0546875, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 0.312, "grad_norm": 0.029426012188196182, "learning_rate": 2.7777777777777774e-08, "loss": 0.0089, "num_tokens": 96932352.0, "reward": 1.4266903400421143, "reward_std": 0.17993581295013428, "rewards/accuracy_reward_long_step": 0.54296875, "rewards/final_brier_reward_long_step": 0.7373980283737183, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8053007125854492, "step": 195 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 629.0, "completions/max_terminated_length": 629.0, "completions/mean_length": 316.51171875, "completions/mean_terminated_length": 316.51171875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.3136, "grad_norm": 0.02578766457736492, "learning_rate": 2.222222222222222e-08, "loss": 0.0107, "num_tokens": 97441803.0, "reward": 1.5241920948028564, "reward_std": 0.19832386076450348, "rewards/accuracy_reward_long_step": 0.64453125, "rewards/final_brier_reward_long_step": 0.7144637107849121, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.804179847240448, "step": 196 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 578.0, "completions/max_terminated_length": 578.0, "completions/mean_length": 305.58984375, "completions/mean_terminated_length": 305.58984375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.3152, "grad_norm": 0.03043382056057453, "learning_rate": 1.6666666666666667e-08, "loss": 0.0183, "num_tokens": 97947882.0, "reward": 1.3735759258270264, "reward_std": 0.19583386182785034, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7926558256149292, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.8110226988792419, "step": 197 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 606.0, "completions/max_terminated_length": 606.0, "completions/mean_length": 303.71484375, "completions/mean_terminated_length": 303.71484375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.3168, "grad_norm": 0.028867207467556, "learning_rate": 1.111111111111111e-08, "loss": -0.0018, "num_tokens": 98453969.0, "reward": 1.3594614267349243, "reward_std": 0.21242399513721466, "rewards/accuracy_reward_long_step": 0.47265625, "rewards/final_brier_reward_long_step": 0.7498824000358582, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7973384261131287, "step": 198 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 614.0, "completions/max_terminated_length": 614.0, "completions/mean_length": 315.70703125, "completions/mean_terminated_length": 315.70703125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.3184, "grad_norm": 0.045795369893312454, "learning_rate": 5.555555555555555e-09, "loss": -0.0012, "num_tokens": 98964222.0, "reward": 1.3441438674926758, "reward_std": 0.17573949694633484, "rewards/accuracy_reward_long_step": 0.453125, "rewards/final_brier_reward_long_step": 0.7653417587280273, "rewards/format_reward_long_step": 1.0, "rewards/stepwise_brier_reward_long_step": 0.7987333536148071, "step": 199 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 577.0, "completions/max_terminated_length": 577.0, "completions/mean_length": 304.7421875, "completions/mean_terminated_length": 305.9372863769531, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.32, "grad_norm": 0.025520017370581627, "learning_rate": 0.0, "loss": -0.018, "num_tokens": 99441740.0, "reward": 1.34486985206604, "reward_std": 0.1514306366443634, "rewards/accuracy_reward_long_step": 0.45703125, "rewards/final_brier_reward_long_step": 0.7545433640480042, "rewards/format_reward_long_step": 0.99609375, "rewards/stepwise_brier_reward_long_step": 0.8046233654022217, "step": 200 }, { "epoch": 0.32, "step": 200, "total_flos": 0.0, "train_loss": -0.016620432519121094, "train_runtime": 11304.4046, "train_samples_per_second": 4.529, "train_steps_per_second": 0.018 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 99441740, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }