{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.7561912158360848, "aux_distill/mean_u": 0.09821361044272292, "aux_distill/n_active_tok": 54.0, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.012635531835258007, "learning_rate": 2.5000000000000004e-07, "loss": 0.0627, "num_tokens": 264685.0, "reward": 0.037574999034404755, "reward_std": 0.07449960708618164, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.4544563018961957, "aux_distill/mean_u": 0.09256462183530526, "aux_distill/n_active_tok": 58.36842105263158, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.009192095138132572, "learning_rate": 5.000000000000001e-07, "loss": 0.0927, "num_tokens": 533467.0, "reward": 0.07537207007408142, "reward_std": 0.14035090804100037, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.5128775238990784, "aux_distill/mean_u": 0.06377023176615988, "aux_distill/n_active_tok": 41.6, "calib/answer_extract_rate": 0.0390625, "calib/auroc": 0.9444444444444444, "calib/avg_num_step_conf": 0.203125, "calib/ece": 0.6272727272727273, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.7272727272727273, "calib/gap": 0.2088888888888889, "calib/mean_conf": 0.8090909090909091, "calib/mu_c": 0.98, "calib/mu_w": 0.7711111111111111, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.6272727272727273, "calib/std_conf": 0.31430889685885144, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 650.43359375, "completions/mean_terminated_length": 727.1222534179688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.005481162574142218, "learning_rate": 7.5e-07, "loss": 0.0425, "num_tokens": 805234.0, "reward": 0.020432421937584877, "reward_std": 0.04559621587395668, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.009614843875169754, "rewards/format_reward_step": 0.0234375, "step": 3 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.6010356366634368, "aux_distill/mean_u": 0.11464373060808687, "aux_distill/n_active_tok": 42.2, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.35, "calib/avg_num_step_conf": 0.3125, "calib/ece": 0.7383333333333335, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.050000000000000155, "calib/mean_conf": 0.8466666666666667, "calib/mu_c": 0.8049999999999999, "calib/mu_w": 0.8550000000000001, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.7091666666666668, "calib/std_conf": 0.269639677264967, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3029.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 682.984375, "completions/mean_terminated_length": 737.7383422851562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.014602464623749256, "learning_rate": 1.0000000000000002e-06, "loss": 0.0349, "num_tokens": 1086246.0, "reward": 0.03255273401737213, "reward_std": 0.07881505787372589, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.014324218966066837, "rewards/format_reward_step": 0.04296875, "step": 4 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.5481972178587546, "aux_distill/mean_u": 0.0767053597090393, "aux_distill/n_active_tok": 38.76923076923077, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.58, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.6186666666666667, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": 0.029999999999999916, "calib/mean_conf": 0.9520000000000001, "calib/mu_c": 0.9720000000000001, "calib/mu_w": 0.9420000000000002, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.6186666666666667, "calib/std_conf": 0.0643117407632541, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 663.9609375, "completions/mean_terminated_length": 748.7841186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.01045077946037054, "learning_rate": 1.25e-06, "loss": 0.0418, "num_tokens": 1362908.0, "reward": 0.03767089545726776, "reward_std": 0.09181629866361618, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.02065429650247097, "rewards/format_reward_step": 0.03515625, "step": 5 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.5697961250940958, "aux_distill/mean_u": 0.09371336220695657, "aux_distill/n_active_tok": 58.733333333333334, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.65625, "calib/avg_num_step_conf": 0.4375, "calib/ece": 0.8947058823529411, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8823529411764706, "calib/gap": 0.028124999999999956, "calib/mean_conf": 0.953529411764706, "calib/mu_c": 0.98, "calib/mu_w": 0.951875, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.8947058823529411, "calib/std_conf": 0.05465842343678744, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2953.0, "completions/max_terminated_length": 2953.0, "completions/mean_length": 598.453125, "completions/mean_terminated_length": 660.362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.00990479625761509, "learning_rate": 1.5e-06, "loss": 0.0339, "num_tokens": 1622064.0, "reward": 0.02689570188522339, "reward_std": 0.06286215037107468, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.006916406098753214, "rewards/format_reward_step": 0.04296875, "step": 6 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.8123426660895348, "aux_distill/mean_u": 0.0868831620532763, "aux_distill/n_active_tok": 44.357142857142854, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.8472222222222222, "calib/avg_num_step_conf": 0.296875, "calib/ece": 0.6080000000000001, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.21916666666666684, "calib/mean_conf": 0.8079999999999998, "calib/mu_c": 0.9833333333333334, "calib/mu_w": 0.7641666666666665, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.6080000000000001, "calib/std_conf": 0.32449345139771313, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 734.79296875, "completions/mean_terminated_length": 810.8060302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.020580844953656197, "learning_rate": 1.75e-06, "loss": 0.0671, "num_tokens": 1917595.0, "reward": 0.026225585490465164, "reward_std": 0.05754803121089935, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.009482420980930328, "rewards/format_reward_step": 0.03125, "step": 7 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.5418999955058098, "aux_distill/mean_u": 0.10516872279061203, "aux_distill/n_active_tok": 58.45, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.5238095238095238, "calib/avg_num_step_conf": 0.5703125, "calib/ece": 0.7068, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.11690476190476173, "calib/mean_conf": 0.8667999999999999, "calib/mu_c": 0.965, "calib/mu_w": 0.8480952380952382, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.1484375, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.7068, "calib/std_conf": 0.2573980574907278, "calib/step_conf_rate": 0.12109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 674.76171875, "completions/mean_terminated_length": 735.0595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.012400745414197445, "learning_rate": 2.0000000000000003e-06, "loss": 0.0829, "num_tokens": 2196846.0, "reward": 0.0567142590880394, "reward_std": 0.1300020068883896, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.023584768176078796, "rewards/format_reward_step": 0.0703125, "step": 8 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.625441624359651, "aux_distill/mean_u": 0.05235877670716145, "aux_distill/n_active_tok": 34.18181818181818, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.33333333333333337, "calib/avg_num_step_conf": 0.18359375, "calib/ece": 0.8238461538461539, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.6923076923076923, "calib/gap": 0.02083333333333315, "calib/mean_conf": 0.900769230769231, "calib/mu_c": 0.92, "calib/mu_w": 0.8991666666666669, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.8238461538461539, "calib/std_conf": 0.12218910554870632, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 711.82421875, "completions/mean_terminated_length": 762.4560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0096, "grad_norm": 0.011278004385530949, "learning_rate": 2.25e-06, "loss": 0.053, "num_tokens": 2486609.0, "reward": 0.02900156006217003, "reward_std": 0.062437769025564194, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.01112812478095293, "rewards/format_reward_step": 0.04296875, "step": 9 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.5412973931857518, "aux_distill/mean_u": 0.049759440712768645, "aux_distill/n_active_tok": 41.357142857142854, "calib/answer_extract_rate": 0.078125, "calib/avg_num_step_conf": 0.25, "calib/ece": 0.8383333333333334, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.75, "calib/mean_conf": 0.8383333333333334, "calib/mu_c": NaN, "calib/mu_w": 0.8383333333333334, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.8383333333333334, "calib/std_conf": 0.25951343360647483, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 702.6953125, "completions/mean_terminated_length": 755.8403930664062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.00804956629872322, "learning_rate": 2.5e-06, "loss": 0.0301, "num_tokens": 2773299.0, "reward": 0.024574805051088333, "reward_std": 0.05259711667895317, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.010087109170854092, "rewards/format_reward_step": 0.0390625, "step": 10 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.43450634820120676, "aux_distill/mean_u": 0.04201065652126385, "aux_distill/n_active_tok": 59.42857142857143, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.49264705882352944, "calib/avg_num_step_conf": 0.421875, "calib/ece": 0.7297142857142858, "calib/final_conf_rate": 0.08203125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": -0.02788235294117658, "calib/mean_conf": 0.8725714285714284, "calib/mu_c": 0.8499999999999999, "calib/mu_w": 0.8778823529411764, "calib/nonempty_final_conf_rate": 0.08203125, "calib/nonempty_reasoning_rate": 0.140625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.7059047619047619, "calib/std_conf": 0.2245436793712489, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2973.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 684.203125, "completions/mean_terminated_length": 729.8167114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.011733333333333333, "grad_norm": 0.008146646432578564, "learning_rate": 2.7500000000000004e-06, "loss": 0.067, "num_tokens": 3052935.0, "reward": 0.05362946167588234, "reward_std": 0.10116725414991379, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.02522767148911953, "rewards/format_reward_step": 0.06640625, "step": 11 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.4705760436398642, "aux_distill/mean_u": 0.07098841757193944, "aux_distill/n_active_tok": 57.095238095238095, "calib/answer_extract_rate": 0.1640625, "calib/auroc": 0.6043956043956045, "calib/avg_num_step_conf": 0.56640625, "calib/ece": 0.506764705882353, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.09765625, "calib/frac_conf_gt_0.9": 0.6470588235294118, "calib/gap": 0.07805860805860809, "calib/mean_conf": 0.8479411764705883, "calib/mu_c": 0.8961538461538462, "calib/mu_w": 0.8180952380952381, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.1796875, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.4861764705882353, "calib/std_conf": 0.2521761687178314, "calib/step_conf_rate": 0.125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 620.20703125, "completions/mean_terminated_length": 669.92822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0128, "grad_norm": 0.010749871842563152, "learning_rate": 3e-06, "loss": 0.1102, "num_tokens": 3315884.0, "reward": 0.10153749585151672, "reward_std": 0.17852166295051575, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.054637499153614044, "rewards/format_reward_step": 0.09765625, "step": 12 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.4806722551584244, "aux_distill/mean_u": 0.06838403580369118, "aux_distill/n_active_tok": 65.14285714285714, "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.3920454545454546, "calib/avg_num_step_conf": 0.66796875, "calib/ece": 0.601, "calib/final_conf_rate": 0.1171875, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": -0.028749999999999942, "calib/mean_conf": 0.8223333333333332, "calib/mu_c": 0.80125, "calib/mu_w": 0.83, "calib/nonempty_final_conf_rate": 0.1171875, "calib/nonempty_reasoning_rate": 0.203125, "calib/nonempty_step_conf_rate": 0.16015625, "calib/pce": 0.5783333333333333, "calib/std_conf": 0.26203922522316303, "calib/step_conf_rate": 0.16015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 678.55859375, "completions/mean_terminated_length": 720.7925415039062, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.013866666666666666, "grad_norm": 0.011776494793593884, "learning_rate": 3.2500000000000002e-06, "loss": 0.1223, "num_tokens": 3594187.0, "reward": 0.08667441457509995, "reward_std": 0.17229244112968445, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.04053632542490959, "rewards/format_reward_step": 0.1015625, "step": 13 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.45552198818096745, "aux_distill/mean_u": 0.052501629064482844, "aux_distill/n_active_tok": 59.42307692307692, "calib/answer_extract_rate": 0.19140625, "calib/auroc": 0.5651041666666667, "calib/avg_num_step_conf": 0.75390625, "calib/ece": 0.6221052631578947, "calib/final_conf_rate": 0.1484375, "calib/format_rate": 0.1171875, "calib/frac_conf_gt_0.9": 0.5789473684210527, "calib/gap": 0.1048958333333333, "calib/mean_conf": 0.78, "calib/mu_c": 0.8683333333333333, "calib/mu_w": 0.7634375, "calib/nonempty_final_conf_rate": 0.1484375, "calib/nonempty_reasoning_rate": 0.22265625, "calib/nonempty_step_conf_rate": 0.171875, "calib/pce": 0.6221052631578947, "calib/std_conf": 0.3280885631398295, "calib/step_conf_rate": 0.171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 637.84765625, "completions/mean_terminated_length": 671.97119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.014933333333333333, "grad_norm": 0.010614952072501183, "learning_rate": 3.5e-06, "loss": 0.0726, "num_tokens": 3862876.0, "reward": 0.09558359533548355, "reward_std": 0.15353244543075562, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.0466359406709671, "rewards/format_reward_step": 0.1171875, "step": 14 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4393505273120744, "aux_distill/mean_u": 0.0932373114978294, "aux_distill/n_active_tok": 72.17857142857143, "calib/answer_extract_rate": 0.2265625, "calib/auroc": 0.45959595959595956, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.6157446808510638, "calib/final_conf_rate": 0.18359375, "calib/format_rate": 0.12890625, "calib/frac_conf_gt_0.9": 0.5531914893617021, "calib/gap": -0.07212121212121203, "calib/mean_conf": 0.8097872340425533, "calib/mu_c": 0.7545454545454546, "calib/mu_w": 0.8266666666666667, "calib/nonempty_final_conf_rate": 0.18359375, "calib/nonempty_reasoning_rate": 0.2734375, "calib/nonempty_step_conf_rate": 0.203125, "calib/pce": 0.5957446808510639, "calib/std_conf": 0.21594413542456847, "calib/step_conf_rate": 0.203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2660.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 580.484375, "completions/mean_terminated_length": 624.3865966796875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.016, "grad_norm": 0.010281026363372803, "learning_rate": 3.7500000000000005e-06, "loss": 0.0852, "num_tokens": 4119360.0, "reward": 0.11895253509283066, "reward_std": 0.19674092531204224, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.05821758136153221, "rewards/format_reward_step": 0.12890625, "step": 15 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.404011321755556, "aux_distill/mean_u": 0.1309152058316472, "aux_distill/n_active_tok": 102.11538461538461, "calib/answer_extract_rate": 0.234375, "calib/auroc": 0.4892344497607655, "calib/avg_num_step_conf": 1.2890625, "calib/ece": 0.6683673469387755, "calib/final_conf_rate": 0.19140625, "calib/format_rate": 0.15625, "calib/frac_conf_gt_0.9": 0.6326530612244898, "calib/gap": -0.07406698564593306, "calib/mean_conf": 0.8165306122448979, "calib/mu_c": 0.759090909090909, "calib/mu_w": 0.8331578947368421, "calib/nonempty_final_conf_rate": 0.19140625, "calib/nonempty_reasoning_rate": 0.2890625, "calib/nonempty_step_conf_rate": 0.23046875, "calib/pce": 0.6302040816326531, "calib/std_conf": 0.2564399203251316, "calib/step_conf_rate": 0.23046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 669.92578125, "completions/mean_terminated_length": 726.6991577148438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.009635679423809052, "learning_rate": 4.000000000000001e-06, "loss": 0.1339, "num_tokens": 4399709.0, "reward": 0.130092591047287, "reward_std": 0.24008221924304962, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.057060156017541885, "rewards/format_reward_step": 0.15625, "step": 16 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46860240880520115, "aux_distill/mean_u": 0.11381851062136948, "aux_distill/n_active_tok": 94.06451612903226, "calib/answer_extract_rate": 0.2890625, "calib/auroc": 0.36937590711175616, "calib/avg_num_step_conf": 1.453125, "calib/ece": 0.643478787878788, "calib/final_conf_rate": 0.2578125, "calib/format_rate": 0.21875, "calib/frac_conf_gt_0.9": 0.4696969696969697, "calib/gap": -0.1606316400580553, "calib/mean_conf": 0.7259151515151515, "calib/mu_c": 0.5969230769230769, "calib/mu_w": 0.7575547169811322, "calib/nonempty_final_conf_rate": 0.2578125, "calib/nonempty_reasoning_rate": 0.36328125, "calib/nonempty_step_conf_rate": 0.296875, "calib/pce": 0.5862121212121213, "calib/std_conf": 0.34071722223569406, "calib/step_conf_rate": 0.296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 517.45703125, "completions/mean_terminated_length": 556.5924682617188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.013831501826643944, "learning_rate": 4.25e-06, "loss": 0.1361, "num_tokens": 4635706.0, "reward": 0.1753574013710022, "reward_std": 0.2824881672859192, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.08118356764316559, "rewards/format_reward_step": 0.21875, "step": 17 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36261486814868066, "aux_distill/mean_u": 0.09953009255725216, "aux_distill/n_active_tok": 94.64516129032258, "calib/answer_extract_rate": 0.31640625, "calib/auroc": 0.6474137931034483, "calib/avg_num_step_conf": 1.44921875, "calib/ece": 0.42455882352941177, "calib/final_conf_rate": 0.265625, "calib/format_rate": 0.21875, "calib/frac_conf_gt_0.9": 0.3235294117647059, "calib/gap": 0.2293448275862069, "calib/mean_conf": 0.5383823529411763, "calib/mu_c": 0.734, "calib/mu_w": 0.5046551724137931, "calib/nonempty_final_conf_rate": 0.265625, "calib/nonempty_reasoning_rate": 0.37890625, "calib/nonempty_step_conf_rate": 0.2890625, "calib/pce": 0.40794117647058825, "calib/std_conf": 0.39705019053795754, "calib/step_conf_rate": 0.2890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 541.7109375, "completions/mean_terminated_length": 575.4274291992188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0192, "grad_norm": 0.014123844914138317, "learning_rate": 4.5e-06, "loss": 0.1028, "num_tokens": 4885104.0, "reward": 0.19429530203342438, "reward_std": 0.3070595860481262, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.13077811896800995, "rewards/format_reward_step": 0.21875, "step": 18 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3752623365726322, "aux_distill/mean_u": 0.10054922271830097, "aux_distill/n_active_tok": 219.3125, "calib/answer_extract_rate": 0.51953125, "calib/auroc": 0.5744301994301995, "calib/avg_num_step_conf": 3.453125, "calib/ece": 0.346603305785124, "calib/final_conf_rate": 0.47265625, "calib/format_rate": 0.40625, "calib/frac_conf_gt_0.9": 0.2396694214876033, "calib/gap": 0.10920726495726507, "calib/mean_conf": 0.39637190082644624, "calib/mu_c": 0.49384615384615393, "calib/mu_w": 0.38463888888888886, "calib/nonempty_final_conf_rate": 0.47265625, "calib/nonempty_reasoning_rate": 0.6875, "calib/nonempty_step_conf_rate": 0.58984375, "calib/pce": 0.3177685950413224, "calib/std_conf": 0.40467524235171576, "calib/step_conf_rate": 0.58984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 457.26953125, "completions/mean_terminated_length": 470.1244812011719, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.020266666666666665, "grad_norm": 0.013274524360895157, "learning_rate": 4.75e-06, "loss": 0.1721, "num_tokens": 5106925.0, "reward": 0.3702906370162964, "reward_std": 0.429590106010437, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.2835499942302704, "rewards/format_reward_step": 0.40625, "step": 19 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36305526876822114, "aux_distill/mean_u": 0.08904974974893316, "aux_distill/n_active_tok": 238.84375, "calib/answer_extract_rate": 0.66796875, "calib/auroc": 0.4521428571428571, "calib/avg_num_step_conf": 3.734375, "calib/ece": 0.32581699346405235, "calib/final_conf_rate": 0.59765625, "calib/format_rate": 0.5, "calib/frac_conf_gt_0.9": 0.16339869281045752, "calib/gap": -0.03130857142857146, "calib/mean_conf": 0.3191503267973856, "calib/mu_c": 0.29357142857142854, "calib/mu_w": 0.32488, "calib/nonempty_final_conf_rate": 0.59765625, "calib/nonempty_reasoning_rate": 0.859375, "calib/nonempty_step_conf_rate": 0.734375, "calib/pce": 0.23098039215686278, "calib/std_conf": 0.3729548987301565, "calib/step_conf_rate": 0.734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 361.8671875, "completions/mean_terminated_length": 373.5403137207031, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.021333333333333333, "grad_norm": 0.017645180225372314, "learning_rate": 5e-06, "loss": 0.1372, "num_tokens": 5304435.0, "reward": 0.4850044846534729, "reward_std": 0.4788362383842468, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.3411027193069458, "rewards/format_reward_step": 0.5, "step": 20 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3560049566440284, "aux_distill/mean_u": 0.07944683044637237, "aux_distill/n_active_tok": 284.84375, "calib/answer_extract_rate": 0.7890625, "calib/auroc": 0.43239113827349124, "calib/avg_num_step_conf": 4.3828125, "calib/ece": 0.3241648936170212, "calib/final_conf_rate": 0.734375, "calib/format_rate": 0.671875, "calib/frac_conf_gt_0.9": 0.11702127659574468, "calib/gap": -0.09717761650114587, "calib/mean_conf": 0.26725, "calib/mu_c": 0.18764705882352944, "calib/mu_w": 0.2848246753246753, "calib/nonempty_final_conf_rate": 0.734375, "calib/nonempty_reasoning_rate": 0.9375, "calib/nonempty_step_conf_rate": 0.87109375, "calib/pce": 0.20528191489361697, "calib/std_conf": 0.3527064602108503, "calib/step_conf_rate": 0.87109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2802.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 342.6796875, "completions/mean_terminated_length": 346.74310302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0224, "grad_norm": 0.015373860485851765, "learning_rate": 4.9722222222222224e-06, "loss": 0.1704, "num_tokens": 5495121.0, "reward": 0.64105623960495, "reward_std": 0.4382461607456207, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.4657062888145447, "rewards/format_reward_step": 0.671875, "step": 21 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3883116855286062, "aux_distill/mean_u": 0.095899419626436, "aux_distill/n_active_tok": 275.0625, "calib/answer_extract_rate": 0.78125, "calib/auroc": 0.5436607142857143, "calib/avg_num_step_conf": 4.16796875, "calib/ece": 0.2713825641025641, "calib/final_conf_rate": 0.76171875, "calib/format_rate": 0.6796875, "calib/frac_conf_gt_0.9": 0.14358974358974358, "calib/gap": 0.08447071428571423, "calib/mean_conf": 0.2749764102564103, "calib/mu_c": 0.34428571428571425, "calib/mu_w": 0.259815, "calib/nonempty_final_conf_rate": 0.76171875, "calib/nonempty_reasoning_rate": 0.9296875, "calib/nonempty_step_conf_rate": 0.87109375, "calib/pce": 0.18343589743589744, "calib/std_conf": 0.3698652772847972, "calib/step_conf_rate": 0.87109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 328.2890625, "completions/mean_terminated_length": 330.8740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.023466666666666667, "grad_norm": 0.016197707504034042, "learning_rate": 4.944444444444445e-06, "loss": 0.1975, "num_tokens": 5680979.0, "reward": 0.6641405820846558, "reward_std": 0.45880556106567383, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.5118749141693115, "rewards/format_reward_step": 0.6796875, "step": 22 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37039018934592605, "aux_distill/mean_u": 0.07887884510197941, "aux_distill/n_active_tok": 290.09375, "calib/answer_extract_rate": 0.76953125, "calib/auroc": 0.5085059171597632, "calib/avg_num_step_conf": 4.359375, "calib/ece": 0.19217616580310878, "calib/final_conf_rate": 0.75390625, "calib/format_rate": 0.69140625, "calib/frac_conf_gt_0.9": 0.06217616580310881, "calib/gap": 0.019437869822485215, "calib/mean_conf": 0.13797927461139897, "calib/mu_c": 0.155, "calib/mu_w": 0.13556213017751478, "calib/nonempty_final_conf_rate": 0.75390625, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.10290155440414508, "calib/std_conf": 0.2753284606220166, "calib/step_conf_rate": 0.921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2716.0, "completions/max_terminated_length": 2716.0, "completions/mean_length": 339.54296875, "completions/mean_terminated_length": 340.8745422363281, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.024533333333333334, "grad_norm": 0.013020611368119717, "learning_rate": 4.9166666666666665e-06, "loss": 0.1838, "num_tokens": 5871838.0, "reward": 0.6801663637161255, "reward_std": 0.4263819456100464, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.5634578466415405, "rewards/format_reward_step": 0.69140625, "step": 23 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3795403325930238, "aux_distill/mean_u": 0.0993213219486208, "aux_distill/n_active_tok": 288.4375, "calib/answer_extract_rate": 0.86328125, "calib/auroc": 0.42058823529411765, "calib/avg_num_step_conf": 4.515625, "calib/ece": 0.22866036866359452, "calib/final_conf_rate": 0.84765625, "calib/format_rate": 0.78125, "calib/frac_conf_gt_0.9": 0.06912442396313365, "calib/gap": -0.0714048128342246, "calib/mean_conf": 0.1655331797235023, "calib/mu_c": 0.104, "calib/mu_w": 0.1754048128342246, "calib/nonempty_final_conf_rate": 0.84765625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.12797235023041476, "calib/std_conf": 0.2959538720526848, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2398.0, "completions/max_terminated_length": 2398.0, "completions/mean_length": 293.7265625, "completions/mean_terminated_length": 297.2095031738281, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.0256, "grad_norm": 0.012711533345282078, "learning_rate": 4.888888888888889e-06, "loss": 0.1428, "num_tokens": 6051544.0, "reward": 0.7570474147796631, "reward_std": 0.35965046286582947, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.607844889163971, "rewards/format_reward_step": 0.78125, "step": 24 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34714192571118474, "aux_distill/mean_u": 0.09413421996178056, "aux_distill/n_active_tok": 284.34375, "calib/answer_extract_rate": 0.90625, "calib/auroc": 0.5238383838383839, "calib/avg_num_step_conf": 4.3828125, "calib/ece": 0.1623762331838565, "calib/final_conf_rate": 0.87109375, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.026905829596412557, "calib/gap": 0.012165151515151504, "calib/mean_conf": 0.12479865470852018, "calib/mu_c": 0.1356, "calib/mu_w": 0.1234348484848485, "calib/nonempty_final_conf_rate": 0.87109375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.08753363228699551, "calib/std_conf": 0.23934835832936371, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2226.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 271.77734375, "completions/mean_terminated_length": 273.9173278808594, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.02666666666666667, "grad_norm": 0.01145833171904087, "learning_rate": 4.861111111111111e-06, "loss": 0.1131, "num_tokens": 6224343.0, "reward": 0.8035451173782349, "reward_std": 0.3558991849422455, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.6852152347564697, "rewards/format_reward_step": 0.81640625, "step": 25 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3926220517605543, "aux_distill/mean_u": 0.11919352905940954, "aux_distill/n_active_tok": 289.59375, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.43568840579710144, "calib/avg_num_step_conf": 4.484375, "calib/ece": 0.15626017316017315, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.030303030303030304, "calib/gap": -0.061667391304347834, "calib/mean_conf": 0.09317705627705629, "calib/mu_c": 0.03791666666666666, "calib/mu_w": 0.0995840579710145, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.07277056277056278, "calib/std_conf": 0.21375208328797457, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2267.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 266.21484375, "completions/mean_terminated_length": 266.21484375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.027733333333333332, "grad_norm": 0.011086120270192623, "learning_rate": 4.833333333333333e-06, "loss": 0.1295, "num_tokens": 6397734.0, "reward": 0.8561491966247559, "reward_std": 0.2689933180809021, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.7396421432495117, "rewards/format_reward_step": 0.875, "step": 26 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3392999805510044, "aux_distill/mean_u": 0.08765503889892604, "aux_distill/n_active_tok": 276.25, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5318181818181819, "calib/avg_num_step_conf": 4.28125, "calib/ece": 0.1106625, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.016666666666666666, "calib/gap": -0.03391363636363635, "calib/mean_conf": 0.07008750000000001, "calib/mu_c": 0.039, "calib/mu_w": 0.07291363636363635, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.04870833333333333, "calib/std_conf": 0.1805147635063404, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1882.0, "completions/max_terminated_length": 1882.0, "completions/mean_length": 233.49609375, "completions/mean_terminated_length": 234.41177368164062, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.0288, "grad_norm": 0.01071285642683506, "learning_rate": 4.805555555555556e-06, "loss": 0.0644, "num_tokens": 6562725.0, "reward": 0.8917468786239624, "reward_std": 0.2230653464794159, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.7991188168525696, "rewards/format_reward_step": 0.90234375, "step": 27 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3293367293663323, "aux_distill/mean_u": 0.08486807697068835, "aux_distill/n_active_tok": 266.28125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4142295597484277, "calib/avg_num_step_conf": 4.203125, "calib/ece": 0.14791818181818184, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.024793388429752067, "calib/gap": 0.01924622641509434, "calib/mean_conf": 0.0851396694214876, "calib/mu_c": 0.10200000000000001, "calib/mu_w": 0.08275377358490567, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.054545454545454536, "calib/std_conf": 0.20000299294427465, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1156.0, "completions/max_terminated_length": 1156.0, "completions/mean_length": 224.82421875, "completions/mean_terminated_length": 225.70590209960938, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.029866666666666666, "grad_norm": 0.010027695447206497, "learning_rate": 4.777777777777778e-06, "loss": 0.0176, "num_tokens": 6727224.0, "reward": 0.9075891971588135, "reward_std": 0.22711993753910065, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.7800223231315613, "rewards/format_reward_step": 0.9140625, "step": 28 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33028204878792167, "aux_distill/mean_u": 0.09123819166510017, "aux_distill/n_active_tok": 288.34375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.477989352989353, "calib/avg_num_step_conf": 4.4609375, "calib/ece": 0.08967540983606559, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.00819672131147541, "calib/gap": 0.04145323505323507, "calib/mean_conf": 0.04819344262295082, "calib/mu_c": 0.08590909090909092, "calib/mu_w": 0.044455855855855854, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.023852459016393445, "calib/std_conf": 0.13082393858854915, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2240.0, "completions/max_terminated_length": 2240.0, "completions/mean_length": 240.50390625, "completions/mean_terminated_length": 241.4470672607422, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.030933333333333334, "grad_norm": 0.0092391986399889, "learning_rate": 4.75e-06, "loss": 0.0501, "num_tokens": 6895921.0, "reward": 0.9342766404151917, "reward_std": 0.15640118718147278, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.8412095904350281, "rewards/format_reward_step": 0.9296875, "step": 29 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3043099972419441, "aux_distill/mean_u": 0.07361558707575262, "aux_distill/n_active_tok": 300.25, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5543650793650794, "calib/avg_num_step_conf": 4.703125, "calib/ece": 0.11186788617886177, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": 0.04093412698412698, "calib/mean_conf": 0.055611788617886175, "calib/mu_c": 0.09055555555555556, "calib/mu_w": 0.04962142857142857, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.010569105691056912, "calib/std_conf": 0.12115992749981405, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 768.0, "completions/max_terminated_length": 768.0, "completions/mean_length": 233.4140625, "completions/mean_terminated_length": 234.3294219970703, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.032, "grad_norm": 0.00907308142632246, "learning_rate": 4.722222222222222e-06, "loss": 0.0428, "num_tokens": 7062659.0, "reward": 0.9381251335144043, "reward_std": 0.18297097086906433, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8020316362380981, "rewards/format_reward_step": 0.93359375, "step": 30 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.30257489811629057, "aux_distill/mean_u": 0.06740870386265194, "aux_distill/n_active_tok": 296.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.45075974770642196, "calib/avg_num_step_conf": 4.6875, "calib/ece": 0.12351999999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.012, "calib/gap": -0.014423738532110097, "calib/mean_conf": 0.06664, "calib/mu_c": 0.0540625, "calib/mu_w": 0.0684862385321101, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.031079999999999997, "calib/std_conf": 0.14081729439241475, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 739.0, "completions/max_terminated_length": 739.0, "completions/mean_length": 222.49609375, "completions/mean_terminated_length": 223.36863708496094, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.03306666666666667, "grad_norm": 0.009486602619290352, "learning_rate": 4.694444444444445e-06, "loss": 0.0173, "num_tokens": 7225530.0, "reward": 0.9653845429420471, "reward_std": 0.11241470277309418, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.837019145488739, "rewards/format_reward_step": 0.96875, "step": 31 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32631619879975915, "aux_distill/mean_u": 0.08648384814445001, "aux_distill/n_active_tok": 294.1875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5512232415902141, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.13063266932270917, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": -0.00442585487906591, "calib/mean_conf": 0.07566215139442233, "calib/mu_c": 0.07181818181818181, "calib/mu_w": 0.07624403669724772, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03741035856573705, "calib/std_conf": 0.16396980567488864, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 540.0, "completions/max_terminated_length": 540.0, "completions/mean_length": 213.46875, "completions/mean_terminated_length": 214.30589294433594, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.034133333333333335, "grad_norm": 0.009173893369734287, "learning_rate": 4.666666666666667e-06, "loss": 0.0242, "num_tokens": 7386882.0, "reward": 0.9543381929397583, "reward_std": 0.13994824886322021, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.8188326954841614, "rewards/format_reward_step": 0.9609375, "step": 32 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3093249648809433, "aux_distill/mean_u": 0.07432332380150891, "aux_distill/n_active_tok": 329.1875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5285885989010989, "calib/avg_num_step_conf": 5.13671875, "calib/ece": 0.09990800000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003763392857142854, "calib/mean_conf": 0.063372, "calib/mu_c": 0.060000000000000005, "calib/mu_w": 0.06376339285714286, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02964, "calib/std_conf": 0.13201373267959662, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2364.0, "completions/max_terminated_length": 2364.0, "completions/mean_length": 252.53125, "completions/mean_terminated_length": 253.52159118652344, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.0352, "grad_norm": 0.008318406529724598, "learning_rate": 4.638888888888889e-06, "loss": 0.0882, "num_tokens": 7558402.0, "reward": 0.9643739461898804, "reward_std": 0.1018943265080452, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8584355115890503, "rewards/format_reward_step": 0.96875, "step": 33 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2938751680776477, "aux_distill/mean_u": 0.08131013044518337, "aux_distill/n_active_tok": 315.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47363569321533927, "calib/avg_num_step_conf": 5.0078125, "calib/ece": 0.07644000000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.03626474926253687, "calib/mean_conf": 0.0518, "calib/mu_c": 0.08458333333333334, "calib/mu_w": 0.04831858407079647, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.016120000000000002, "calib/std_conf": 0.09212795449807837, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 842.0, "completions/max_terminated_length": 842.0, "completions/mean_length": 219.78125, "completions/mean_terminated_length": 220.6431427001953, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.03626666666666667, "grad_norm": 0.009772083722054958, "learning_rate": 4.611111111111112e-06, "loss": 0.0595, "num_tokens": 7719778.0, "reward": 0.9713041186332703, "reward_std": 0.1016630083322525, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.8801082372665405, "rewards/format_reward_step": 0.96875, "step": 34 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31735930871218443, "aux_distill/mean_u": 0.0893676437073024, "aux_distill/n_active_tok": 315.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.37512800819252434, "calib/avg_num_step_conf": 4.91796875, "calib/ece": 0.13023715415019763, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0259536610343062, "calib/mean_conf": 0.05031620553359684, "calib/mu_c": 0.028055555555555556, "calib/mu_w": 0.054009216589861755, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.019130434782608695, "calib/std_conf": 0.0948636363430505, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1565.0, "completions/max_terminated_length": 1565.0, "completions/mean_length": 234.33984375, "completions/mean_terminated_length": 234.33984375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.037333333333333336, "grad_norm": 0.008290213532745838, "learning_rate": 4.583333333333333e-06, "loss": 0.0982, "num_tokens": 7889025.0, "reward": 0.9709469079971313, "reward_std": 0.07739226520061493, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8286125063896179, "rewards/format_reward_step": 0.97265625, "step": 35 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32667525624856353, "aux_distill/mean_u": 0.0935250968970002, "aux_distill/n_active_tok": 312.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4863888106636628, "calib/avg_num_step_conf": 4.90625, "calib/ece": 0.2004330708661417, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015177884164085234, "calib/mean_conf": 0.043897637795275585, "calib/mu_c": 0.031886792452830194, "calib/mu_w": 0.04706467661691543, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017834645669291337, "calib/std_conf": 0.09072848953508278, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1743.0, "completions/max_terminated_length": 1743.0, "completions/mean_length": 227.453125, "completions/mean_terminated_length": 227.453125, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.0384, "grad_norm": 0.008621211163699627, "learning_rate": 4.555555555555556e-06, "loss": 0.0632, "num_tokens": 8049965.0, "reward": 0.9937493801116943, "reward_std": 0.04545110464096069, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7882800698280334, "rewards/format_reward_step": 0.9921875, "step": 36 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27830373821780086, "aux_distill/mean_u": 0.06014688570484389, "aux_distill/n_active_tok": 271.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5403703703703703, "calib/avg_num_step_conf": 4.24609375, "calib/ece": 0.0991764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00017777777777778017, "calib/mean_conf": 0.029843137254901966, "calib/mu_c": 0.030000000000000006, "calib/mu_w": 0.029822222222222226, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005686274509803923, "calib/std_conf": 0.04472985253502062, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 748.0, "completions/max_terminated_length": 748.0, "completions/mean_length": 196.78515625, "completions/mean_terminated_length": 197.55686950683594, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.039466666666666664, "grad_norm": 0.009745295159518719, "learning_rate": 4.527777777777778e-06, "loss": 0.0497, "num_tokens": 8207438.0, "reward": 0.9884085655212402, "reward_std": 0.047429949045181274, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.8752546906471252, "rewards/format_reward_step": 0.984375, "step": 37 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34966559894382954, "aux_distill/mean_u": 0.09391435826298852, "aux_distill/n_active_tok": 261.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5124125479575716, "calib/avg_num_step_conf": 4.08203125, "calib/ece": 0.15146245059288538, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004239449334236058, "calib/mean_conf": 0.029011857707509883, "calib/mu_c": 0.02547619047619048, "calib/mu_w": 0.029715639810426537, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0072332015810276686, "calib/std_conf": 0.0670983280726559, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1823.0, "completions/max_terminated_length": 1823.0, "completions/mean_length": 193.234375, "completions/mean_terminated_length": 193.234375, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.04053333333333333, "grad_norm": 0.010434729978442192, "learning_rate": 4.5e-06, "loss": 0.0401, "num_tokens": 8363794.0, "reward": 0.9839221239089966, "reward_std": 0.06251996010541916, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8233128786087036, "rewards/format_reward_step": 0.98046875, "step": 38 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32260613050311804, "aux_distill/mean_u": 0.07570191593785686, "aux_distill/n_active_tok": 233.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.43281970046082946, "calib/avg_num_step_conf": 3.609375, "calib/ece": 0.10807725490196078, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.008174063940092163, "calib/mean_conf": 0.02556745098039215, "calib/mu_c": 0.01838709677419355, "calib/mu_w": 0.026561160714285714, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006038039215686275, "calib/std_conf": 0.06936021292239343, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2179.0, "completions/max_terminated_length": 2179.0, "completions/mean_length": 181.72265625, "completions/mean_terminated_length": 181.72265625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.0416, "grad_norm": 0.008618353866040707, "learning_rate": 4.472222222222223e-06, "loss": 0.0462, "num_tokens": 8516403.0, "reward": 0.9838817119598389, "reward_std": 0.053902022540569305, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8622946739196777, "rewards/format_reward_step": 0.984375, "step": 39 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31119333766400814, "aux_distill/mean_u": 0.0803044466646787, "aux_distill/n_active_tok": 235.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.43339945071711933, "calib/avg_num_step_conf": 3.70703125, "calib/ece": 0.10207843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012967653341470858, "calib/mean_conf": 0.025058823529411765, "calib/mu_c": 0.036551724137931035, "calib/mu_w": 0.023584070796460178, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006705882352941176, "calib/std_conf": 0.0748272289133522, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 628.0, "completions/max_terminated_length": 628.0, "completions/mean_length": 176.61328125, "completions/mean_terminated_length": 177.30589294433594, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.042666666666666665, "grad_norm": 0.008439777418971062, "learning_rate": 4.444444444444444e-06, "loss": 0.0166, "num_tokens": 8668376.0, "reward": 0.9945722222328186, "reward_std": 0.02659289352595806, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.883675754070282, "rewards/format_reward_step": 0.9921875, "step": 40 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3391628819517791, "aux_distill/mean_u": 0.09605003539857801, "aux_distill/n_active_tok": 230.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5066517895459617, "calib/avg_num_step_conf": 3.609375, "calib/ece": 0.23242063492063492, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0034065745429576865, "calib/mean_conf": 0.0175, "calib/mu_c": 0.014918032786885247, "calib/mu_w": 0.018324607329842934, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003928571428571429, "calib/std_conf": 0.03396572735745012, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 171.4296875, "completions/mean_terminated_length": 172.1019744873047, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.04373333333333333, "grad_norm": 0.009633166715502739, "learning_rate": 4.416666666666667e-06, "loss": 0.0363, "num_tokens": 8819510.0, "reward": 0.9852579832077026, "reward_std": 0.056308452039957047, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.75176602602005, "rewards/format_reward_step": 0.98046875, "step": 41 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32519798586145043, "aux_distill/mean_u": 0.08523037325117232, "aux_distill/n_active_tok": 212.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4938311688311688, "calib/avg_num_step_conf": 3.34765625, "calib/ece": 0.1280392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0016428571428571438, "calib/mean_conf": 0.012274509803921568, "calib/mu_c": 0.010857142857142857, "calib/mu_w": 0.0125, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015294117647058822, "calib/std_conf": 0.022200581542791604, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 401.0, "completions/max_terminated_length": 401.0, "completions/mean_length": 152.13671875, "completions/mean_terminated_length": 152.73333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.0448, "grad_norm": 0.00855032168328762, "learning_rate": 4.388888888888889e-06, "loss": 0.023, "num_tokens": 8962825.0, "reward": 0.9972575902938843, "reward_std": 0.015008872374892235, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8617027401924133, "rewards/format_reward_step": 0.99609375, "step": 42 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3105672858655453, "aux_distill/mean_u": 0.08021462575046566, "aux_distill/n_active_tok": 229.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4931987713909609, "calib/avg_num_step_conf": 3.58984375, "calib/ece": 0.15682352941176472, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007620666959192628, "calib/mean_conf": 0.011803921568627451, "calib/mu_c": 0.018139534883720932, "calib/mu_w": 0.010518867924528304, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.023777580153468786, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 385.0, "completions/max_terminated_length": 385.0, "completions/mean_length": 167.234375, "completions/mean_terminated_length": 167.8902130126953, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.04586666666666667, "grad_norm": 0.008470780216157436, "learning_rate": 4.361111111111112e-06, "loss": 0.0409, "num_tokens": 9110861.0, "reward": 0.9948850870132446, "reward_std": 0.02814546786248684, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8296140432357788, "rewards/format_reward_step": 0.9921875, "step": 43 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3330973004922271, "aux_distill/mean_u": 0.09241892886200444, "aux_distill/n_active_tok": 237.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5287971457696229, "calib/avg_num_step_conf": 3.71484375, "calib/ece": 0.13338582677165353, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002250254841997963, "calib/mean_conf": 0.008346456692913387, "calib/mu_c": 0.01027777777777778, "calib/mu_w": 0.008027522935779817, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012344387581723506, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2142.0, "completions/max_terminated_length": 2142.0, "completions/mean_length": 181.2265625, "completions/mean_terminated_length": 181.2265625, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.046933333333333334, "grad_norm": 0.007521971128880978, "learning_rate": 4.333333333333334e-06, "loss": 0.0436, "num_tokens": 9263575.0, "reward": 0.9896165728569031, "reward_std": 0.02841258980333805, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8503269553184509, "rewards/format_reward_step": 0.98828125, "step": 44 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3044560784474015, "aux_distill/mean_u": 0.0746863951893971, "aux_distill/n_active_tok": 214.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.507775403608737, "calib/avg_num_step_conf": 3.359375, "calib/ece": 0.14403921568627454, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00038817663817663876, "calib/mean_conf": 0.008901960784313726, "calib/mu_c": 0.009230769230769232, "calib/mu_w": 0.008842592592592593, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.014347582842075764, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2297.0, "completions/max_terminated_length": 2297.0, "completions/mean_length": 171.30078125, "completions/mean_terminated_length": 171.30078125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.048, "grad_norm": 0.007751181721687317, "learning_rate": 4.305555555555556e-06, "loss": 0.0843, "num_tokens": 9412476.0, "reward": 0.9895457029342651, "reward_std": 0.03634835034608841, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8384664058685303, "rewards/format_reward_step": 0.98828125, "step": 45 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3505524811334908, "aux_distill/mean_u": 0.11305923610502411, "aux_distill/n_active_tok": 223.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5502726952850104, "calib/avg_num_step_conf": 3.515625, "calib/ece": 0.2225984251968504, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014883884588318087, "calib/mean_conf": 0.005748031496062992, "calib/mu_c": 0.006896551724137932, "calib/mu_w": 0.005408163265306123, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010311654215518736, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 603.0, "completions/max_terminated_length": 603.0, "completions/mean_length": 162.703125, "completions/mean_terminated_length": 163.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.04906666666666667, "grad_norm": 0.0071896458975970745, "learning_rate": 4.277777777777778e-06, "loss": 0.0083, "num_tokens": 9558896.0, "reward": 0.9936808347702026, "reward_std": 0.017596811056137085, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7686116695404053, "rewards/format_reward_step": 0.9921875, "step": 46 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3146101264283061, "aux_distill/mean_u": 0.08592837101606565, "aux_distill/n_active_tok": 228.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5255847953216374, "calib/avg_num_step_conf": 3.578125, "calib/ece": 0.14346456692913387, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0036086744639376245, "calib/mean_conf": 0.006141732283464567, "calib/mu_c": 0.009210526315789476, "calib/mu_w": 0.005601851851851852, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009683758957676255, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 163.14453125, "completions/mean_terminated_length": 163.78431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.050133333333333335, "grad_norm": 0.006908603478223085, "learning_rate": 4.25e-06, "loss": 0.0494, "num_tokens": 9706637.0, "reward": 0.9954426288604736, "reward_std": 0.01957503706216812, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8463538885116577, "rewards/format_reward_step": 0.9921875, "step": 47 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3150333953090012, "aux_distill/mean_u": 0.08773427079714621, "aux_distill/n_active_tok": 217.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5326086956521738, "calib/avg_num_step_conf": 3.4140625, "calib/ece": 0.18262745098039218, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014824879227053143, "calib/mean_conf": 0.008588235294117647, "calib/mu_c": 0.009791666666666667, "calib/mu_w": 0.008309178743961353, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0014901960784313726, "calib/std_conf": 0.021263221004214024, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 158.1796875, "completions/mean_terminated_length": 158.8000030517578, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.0512, "grad_norm": 0.0069527393206954, "learning_rate": 4.222222222222223e-06, "loss": 0.022, "num_tokens": 9850819.0, "reward": 0.9976677298545837, "reward_std": 0.01528632640838623, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.811741828918457, "rewards/format_reward_step": 0.99609375, "step": 48 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32575283851474524, "aux_distill/mean_u": 0.08622161203094775, "aux_distill/n_active_tok": 232.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5482870370370371, "calib/avg_num_step_conf": 3.62890625, "calib/ece": 0.20685039370078742, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003048148148148149, "calib/mean_conf": 0.005748031496062992, "calib/mu_c": 0.00814814814814815, "calib/mu_w": 0.0051, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011434144545147353, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2156.0, "completions/max_terminated_length": 2156.0, "completions/mean_length": 182.9765625, "completions/mean_terminated_length": 182.9765625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.05226666666666667, "grad_norm": 0.005945167969912291, "learning_rate": 4.194444444444445e-06, "loss": 0.0707, "num_tokens": 10002197.0, "reward": 0.9899195432662964, "reward_std": 0.0371810719370842, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7806203365325928, "rewards/format_reward_step": 0.98828125, "step": 49 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3211708148010075, "aux_distill/mean_u": 0.08810421113629638, "aux_distill/n_active_tok": 236.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49433962264150944, "calib/avg_num_step_conf": 3.6953125, "calib/ece": 0.20478260869565218, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007330188679245285, "calib/mean_conf": 0.004703557312252965, "calib/mu_c": 0.005283018867924529, "calib/mu_w": 0.00455, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009176091721549455, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2891.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 203.78125, "completions/mean_terminated_length": 203.78125, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.05333333333333334, "grad_norm": 0.004969415720552206, "learning_rate": 4.166666666666667e-06, "loss": 0.0815, "num_tokens": 10159725.0, "reward": 0.9893224239349365, "reward_std": 0.028084326535463333, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7833324074745178, "rewards/format_reward_step": 0.98828125, "step": 50 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29707011906430125, "aux_distill/mean_u": 0.07452163606529925, "aux_distill/n_active_tok": 241.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5441661813240148, "calib/avg_num_step_conf": 3.7734375, "calib/ece": 0.19521739130434784, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013618714812657744, "calib/mean_conf": 0.006363636363636363, "calib/mu_c": 0.007450980392156864, "calib/mu_w": 0.006089108910891089, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010862879726242943, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2042.0, "completions/max_terminated_length": 2042.0, "completions/mean_length": 193.67578125, "completions/mean_terminated_length": 193.67578125, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.0544, "grad_norm": 0.006089591421186924, "learning_rate": 4.138888888888889e-06, "loss": 0.0822, "num_tokens": 10318602.0, "reward": 0.9896872639656067, "reward_std": 0.03538576513528824, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.7918745279312134, "rewards/format_reward_step": 0.98828125, "step": 51 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3404705720022321, "aux_distill/mean_u": 0.1005887897967428, "aux_distill/n_active_tok": 212.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5432149037972106, "calib/avg_num_step_conf": 3.3203125, "calib/ece": 0.18183593750000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0019372900335946248, "calib/mean_conf": 0.0075390625, "calib/mu_c": 0.005957446808510639, "calib/mu_w": 0.007894736842105263, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002890625, "calib/std_conf": 0.04034759176978341, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 170.72265625, "completions/mean_terminated_length": 171.3921661376953, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.055466666666666664, "grad_norm": 0.006997254677116871, "learning_rate": 4.111111111111111e-06, "loss": 0.0022, "num_tokens": 10470259.0, "reward": 1.000251293182373, "reward_std": 0.004490233957767487, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8169089555740356, "rewards/format_reward_step": 1.0, "step": 52 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3243614980019629, "aux_distill/mean_u": 0.09754663784882035, "aux_distill/n_active_tok": 236.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49855324074074076, "calib/avg_num_step_conf": 3.703125, "calib/ece": 0.24121568627450982, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013045634920634932, "calib/mean_conf": 0.0058431372549019615, "calib/mu_c": 0.0068253968253968265, "calib/mu_w": 0.005520833333333333, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.011130901253117435, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 466.0, "completions/max_terminated_length": 466.0, "completions/mean_length": 184.984375, "completions/mean_terminated_length": 185.70980834960938, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.05653333333333333, "grad_norm": 0.00639935489743948, "learning_rate": 4.083333333333334e-06, "loss": 0.0273, "num_tokens": 10623439.0, "reward": 0.989882230758667, "reward_std": 0.029333556070923805, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.745389461517334, "rewards/format_reward_step": 0.98828125, "step": 53 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.30648385314270854, "aux_distill/mean_u": 0.08786370281314095, "aux_distill/n_active_tok": 262.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4715535227090355, "calib/avg_num_step_conf": 4.1015625, "calib/ece": 0.2612648221343874, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015904349221633764, "calib/mean_conf": 0.0035573122529644276, "calib/mu_c": 0.0023880597014925378, "calib/mu_w": 0.003978494623655914, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008340201932212852, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 218.15625, "completions/mean_terminated_length": 218.15625, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.0576, "grad_norm": 0.005304010584950447, "learning_rate": 4.055555555555556e-06, "loss": 0.0789, "num_tokens": 10785519.0, "reward": 0.9830062389373779, "reward_std": 0.046094685792922974, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7238249778747559, "rewards/format_reward_step": 0.98046875, "step": 54 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3295765253715217, "aux_distill/mean_u": 0.09773883690950656, "aux_distill/n_active_tok": 266.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4909261067708333, "calib/avg_num_step_conf": 4.15234375, "calib/ece": 0.2450390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000572916666666666, "calib/mean_conf": 0.0049609375, "calib/mu_c": 0.0045312500000000006, "calib/mu_w": 0.005104166666666667, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009682379569150022, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 596.0, "completions/max_terminated_length": 596.0, "completions/mean_length": 201.58203125, "completions/mean_terminated_length": 202.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.058666666666666666, "grad_norm": 0.0058689904399216175, "learning_rate": 4.027777777777779e-06, "loss": 0.0203, "num_tokens": 10944948.0, "reward": 0.9913079738616943, "reward_std": 0.029984353110194206, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7443346977233887, "rewards/format_reward_step": 0.98828125, "step": 55 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3124923906289041, "aux_distill/mean_u": 0.09151797595988466, "aux_distill/n_active_tok": 287.15625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47722727272727267, "calib/avg_num_step_conf": 4.515625, "calib/ece": 0.21137254901960786, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015590909090909093, "calib/mean_conf": 0.004313725490196079, "calib/mu_c": 0.003090909090909091, "calib/mu_w": 0.0046500000000000005, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008779410396207511, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 696.0, "completions/max_terminated_length": 696.0, "completions/mean_length": 211.5234375, "completions/mean_terminated_length": 212.3529510498047, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.05973333333333333, "grad_norm": 0.0049371616914868355, "learning_rate": 4.000000000000001e-06, "loss": 0.0203, "num_tokens": 11105938.0, "reward": 0.9928038716316223, "reward_std": 0.023647766560316086, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.7785765528678894, "rewards/format_reward_step": 0.9921875, "step": 56 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2937448946759105, "aux_distill/mean_u": 0.0755081459411437, "aux_distill/n_active_tok": 311.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46857605862044677, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.3214173228346457, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011526808990347352, "calib/mean_conf": 0.005354330708661418, "calib/mu_c": 0.004578313253012049, "calib/mu_w": 0.0057309941520467846, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011891588742458496, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 255.3125, "completions/mean_terminated_length": 255.3125, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.0608, "grad_norm": 0.0056275613605976105, "learning_rate": 3.972222222222223e-06, "loss": 0.0815, "num_tokens": 11278090.0, "reward": 0.9896812438964844, "reward_std": 0.036557819694280624, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.6668624877929688, "rewards/format_reward_step": 0.98828125, "step": 57 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2732988477218896, "aux_distill/mean_u": 0.0651957247729289, "aux_distill/n_active_tok": 315.46875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5159128978224456, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.218671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000488406947015782, "calib/mean_conf": 0.0052343749999999994, "calib/mu_c": 0.0056140350877192996, "calib/mu_w": 0.0051256281407035175, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.000625, "calib/std_conf": 0.013108015424135531, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 910.0, "completions/max_terminated_length": 910.0, "completions/mean_length": 246.265625, "completions/mean_terminated_length": 247.23138427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.06186666666666667, "grad_norm": 0.004538458306342363, "learning_rate": 3.944444444444445e-06, "loss": 0.0216, "num_tokens": 11447454.0, "reward": 0.9894318580627441, "reward_std": 0.03593946248292923, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7679261565208435, "rewards/format_reward_step": 0.98828125, "step": 58 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26986421295441687, "aux_distill/mean_u": 0.07765561929139546, "aux_distill/n_active_tok": 341.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5320283473235172, "calib/avg_num_step_conf": 5.38671875, "calib/ece": 0.33254901960784317, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018473923214531451, "calib/mean_conf": 0.004705882352941177, "calib/mu_c": 0.005930232558139535, "calib/mu_w": 0.00408284023668639, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010655559384193809, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 788.0, "completions/max_terminated_length": 788.0, "completions/mean_length": 256.7109375, "completions/mean_terminated_length": 257.7176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.06293333333333333, "grad_norm": 0.005329262465238571, "learning_rate": 3.916666666666667e-06, "loss": 0.0169, "num_tokens": 11619420.0, "reward": 0.9980183839797974, "reward_std": 0.015194371342658997, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.66400545835495, "rewards/format_reward_step": 0.99609375, "step": 59 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3008674378506839, "aux_distill/mean_u": 0.07826646190527813, "aux_distill/n_active_tok": 353.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.540191652807476, "calib/avg_num_step_conf": 5.5234375, "calib/ece": 0.26999999999999996, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014255167498218105, "calib/mean_conf": 0.004761904761904762, "calib/mu_c": 0.005797101449275362, "calib/mu_w": 0.004371584699453552, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00047619047619047614, "calib/std_conf": 0.011106575037800574, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 269.203125, "completions/mean_terminated_length": 270.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.064, "grad_norm": 0.005264188162982464, "learning_rate": 3.88888888888889e-06, "loss": 0.017, "num_tokens": 11797192.0, "reward": 0.9858656525611877, "reward_std": 0.04708104953169823, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7178249359130859, "rewards/format_reward_step": 0.984375, "step": 60 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28734378842636943, "aux_distill/mean_u": 0.08205361872395535, "aux_distill/n_active_tok": 350.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4551039697542534, "calib/avg_num_step_conf": 5.47265625, "calib/ece": 0.35873517786561265, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012111801242236028, "calib/mean_conf": 0.004901185770750989, "calib/mu_c": 0.004130434782608695, "calib/mu_w": 0.005341614906832298, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008081240083161419, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 263.265625, "completions/mean_terminated_length": 263.265625, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.06506666666666666, "grad_norm": 0.00512561434879899, "learning_rate": 3.861111111111112e-06, "loss": 0.085, "num_tokens": 11968652.0, "reward": 0.989721417427063, "reward_std": 0.03596086427569389, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6317867040634155, "rewards/format_reward_step": 0.98828125, "step": 61 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26067466707900167, "aux_distill/mean_u": 0.06894861401193524, "aux_distill/n_active_tok": 385.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.45507317073170733, "calib/avg_num_step_conf": 6.11328125, "calib/ece": 0.1912156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0018195121951219517, "calib/mean_conf": 0.004862745098039216, "calib/mu_c": 0.0034, "calib/mu_w": 0.0052195121951219515, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008157805375397733, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1024.0, "completions/max_terminated_length": 1024.0, "completions/mean_length": 294.1484375, "completions/mean_terminated_length": 295.3019714355469, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.06613333333333334, "grad_norm": 0.004156938754022121, "learning_rate": 3.833333333333334e-06, "loss": -0.0044, "num_tokens": 12151034.0, "reward": 0.996712863445282, "reward_std": 0.01257465872913599, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8020195364952087, "rewards/format_reward_step": 0.99609375, "step": 62 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2901492426171899, "aux_distill/mean_u": 0.08588818541921382, "aux_distill/n_active_tok": 386.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4864945213624394, "calib/avg_num_step_conf": 6.109375, "calib/ece": 0.2366535433070866, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0013751804977490865, "calib/mean_conf": 0.0035039370078740156, "calib/mu_c": 0.002459016393442623, "calib/mu_w": 0.0038341968911917096, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008780233720409064, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 883.0, "completions/max_terminated_length": 883.0, "completions/mean_length": 292.73046875, "completions/mean_terminated_length": 293.8784484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.0672, "grad_norm": 0.004469459876418114, "learning_rate": 3.8055555555555556e-06, "loss": 0.0327, "num_tokens": 12334613.0, "reward": 0.9927290678024292, "reward_std": 0.023673495277762413, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7549893856048584, "rewards/format_reward_step": 0.9921875, "step": 63 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2550811097025871, "aux_distill/mean_u": 0.07274823291578757, "aux_distill/n_active_tok": 428.1875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5149980522010129, "calib/avg_num_step_conf": 6.66015625, "calib/ece": 0.39948616600790515, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00024737047136735467, "calib/mean_conf": 0.003675889328063241, "calib/mu_c": 0.003823529411764706, "calib/mu_w": 0.0035761589403973514, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006677619669182257, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 324.765625, "completions/mean_terminated_length": 324.765625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.06826666666666667, "grad_norm": 0.004222342744469643, "learning_rate": 3.777777777777778e-06, "loss": 0.0683, "num_tokens": 12521529.0, "reward": 0.9897758960723877, "reward_std": 0.035435110330581665, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5928331613540649, "rewards/format_reward_step": 0.98828125, "step": 64 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25398184289224446, "aux_distill/mean_u": 0.07289780750063732, "aux_distill/n_active_tok": 377.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5176613066002826, "calib/avg_num_step_conf": 5.94140625, "calib/ece": 0.3434375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000594765525129516, "calib/mean_conf": 0.00421875, "calib/mu_c": 0.004606741573033708, "calib/mu_w": 0.004011976047904192, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008441839162025061, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 908.0, "completions/max_terminated_length": 908.0, "completions/mean_length": 286.390625, "completions/mean_terminated_length": 287.51373291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.06933333333333333, "grad_norm": 0.004306511953473091, "learning_rate": 3.7500000000000005e-06, "loss": 0.0023, "num_tokens": 12699869.0, "reward": 0.9976509809494019, "reward_std": 0.013895140960812569, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6515519618988037, "rewards/format_reward_step": 0.99609375, "step": 65 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2754818422254175, "aux_distill/mean_u": 0.07254377648620623, "aux_distill/n_active_tok": 409.5625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4582591093117409, "calib/avg_num_step_conf": 6.41015625, "calib/ece": 0.25023529411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011012145748987854, "calib/mean_conf": 0.004666666666666666, "calib/mu_c": 0.0038461538461538464, "calib/mu_w": 0.004947368421052632, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009403100587231573, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 323.73828125, "completions/mean_terminated_length": 325.00787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0704, "grad_norm": 0.00394570454955101, "learning_rate": 3.7222222222222225e-06, "loss": 0.033, "num_tokens": 12889098.0, "reward": 0.9931092262268066, "reward_std": 0.024321211501955986, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7401245832443237, "rewards/format_reward_step": 0.9921875, "step": 66 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2856973884627223, "aux_distill/mean_u": 0.08087531891203306, "aux_distill/n_active_tok": 387.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4714991181657849, "calib/avg_num_step_conf": 6.0546875, "calib/ece": 0.311796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00024126984126984167, "calib/mean_conf": 0.004609375, "calib/mu_c": 0.0044444444444444444, "calib/mu_w": 0.004685714285714286, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008874326008738637, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 774.0, "completions/max_terminated_length": 774.0, "completions/mean_length": 308.9609375, "completions/mean_terminated_length": 310.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.07146666666666666, "grad_norm": 0.004880924243479967, "learning_rate": 3.694444444444445e-06, "loss": 0.0259, "num_tokens": 13073200.0, "reward": 0.9935446977615356, "reward_std": 0.02437354251742363, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6784957051277161, "rewards/format_reward_step": 0.9921875, "step": 67 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2604788551107049, "aux_distill/mean_u": 0.06340758781191137, "aux_distill/n_active_tok": 377.90625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49754325259515564, "calib/avg_num_step_conf": 5.98046875, "calib/ece": 0.3287058823529412, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000647058823529412, "calib/mean_conf": 0.004627450980392157, "calib/mu_c": 0.005058823529411765, "calib/mu_w": 0.004411764705882353, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007705258668569009, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1293.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 294.73046875, "completions/mean_terminated_length": 295.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.07253333333333334, "grad_norm": 0.004593390505760908, "learning_rate": 3.6666666666666666e-06, "loss": -0.0051, "num_tokens": 13252739.0, "reward": 0.9938269853591919, "reward_std": 0.02526906132698059, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6634351015090942, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27652850397862494, "aux_distill/mean_u": 0.07943788932679391, "aux_distill/n_active_tok": 385.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4839379370629371, "calib/avg_num_step_conf": 6.01953125, "calib/ece": 0.30555118110236223, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003438228438228437, "calib/mean_conf": 0.007125984251968504, "calib/mu_c": 0.004743589743589744, "calib/mu_w": 0.00818181818181818, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0027952755905511807, "calib/std_conf": 0.04486207947935929, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2156.0, "completions/max_terminated_length": 2156.0, "completions/mean_length": 346.54296875, "completions/mean_terminated_length": 346.54296875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.0736, "grad_norm": 0.005000604782253504, "learning_rate": 3.638888888888889e-06, "loss": 0.0754, "num_tokens": 13445950.0, "reward": 0.9926091432571411, "reward_std": 0.027545515447854996, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.6883434057235718, "rewards/format_reward_step": 0.9921875, "step": 69 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27339917747303843, "aux_distill/mean_u": 0.07388013492360007, "aux_distill/n_active_tok": 392.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.510620479634564, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.2723828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009311001141987059, "calib/mean_conf": 0.0049609375, "calib/mu_c": 0.005633802816901409, "calib/mu_w": 0.004702702702702703, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008052466958708602, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1021.0, "completions/max_terminated_length": 1021.0, "completions/mean_length": 331.7734375, "completions/mean_terminated_length": 333.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.07466666666666667, "grad_norm": 0.004021927248686552, "learning_rate": 3.6111111111111115e-06, "loss": 0.031, "num_tokens": 13637876.0, "reward": 0.9976115226745605, "reward_std": 0.014393772929906845, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7217855453491211, "rewards/format_reward_step": 0.99609375, "step": 70 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2658953149802983, "aux_distill/mean_u": 0.0833407176913205, "aux_distill/n_active_tok": 422.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48202838827838834, "calib/avg_num_step_conf": 6.6484375, "calib/ece": 0.27669291338582674, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011156898656898657, "calib/mean_conf": 0.0067716535433070876, "calib/mu_c": 0.0059722222222222225, "calib/mu_w": 0.007087912087912088, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008500290848546832, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1047.0, "completions/max_terminated_length": 1047.0, "completions/mean_length": 354.65234375, "completions/mean_terminated_length": 356.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.07573333333333333, "grad_norm": 0.0043555935844779015, "learning_rate": 3.5833333333333335e-06, "loss": 0.0338, "num_tokens": 13833075.0, "reward": 0.9899024963378906, "reward_std": 0.03595929220318794, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.710273802280426, "rewards/format_reward_step": 0.98828125, "step": 71 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2881365141365677, "aux_distill/mean_u": 0.08016156748594756, "aux_distill/n_active_tok": 470.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48798913800306365, "calib/avg_num_step_conf": 7.28125, "calib/ece": 0.3341501976284585, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00046302743350508293, "calib/mean_conf": 0.005770750988142293, "calib/mu_c": 0.005465116279069767, "calib/mu_w": 0.00592814371257485, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007995726021686987, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1039.0, "completions/max_terminated_length": 1039.0, "completions/mean_length": 360.87109375, "completions/mean_terminated_length": 362.2862854003906, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.0768, "grad_norm": 0.004437967669218779, "learning_rate": 3.555555555555556e-06, "loss": 0.0204, "num_tokens": 14029866.0, "reward": 0.9920222163200378, "reward_std": 0.030886013060808182, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.6559194922447205, "rewards/format_reward_step": 0.98828125, "step": 72 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2541104690171778, "aux_distill/mean_u": 0.061887756224744496, "aux_distill/n_active_tok": 391.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5053516317552039, "calib/avg_num_step_conf": 6.109375, "calib/ece": 0.3766141732283465, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009481909514741623, "calib/mean_conf": 0.006771653543307087, "calib/mu_c": 0.006185567010309278, "calib/mu_w": 0.00713375796178344, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007480314960629921, "calib/std_conf": 0.01374010107607492, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2095.0, "completions/max_terminated_length": 2095.0, "completions/mean_length": 340.58203125, "completions/mean_terminated_length": 340.58203125, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.07786666666666667, "grad_norm": 0.004330141935497522, "learning_rate": 3.5277777777777784e-06, "loss": 0.0147, "num_tokens": 14224087.0, "reward": 0.9905085563659668, "reward_std": 0.03687606751918793, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6138296127319336, "rewards/format_reward_step": 0.98828125, "step": 73 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2642949102446437, "aux_distill/mean_u": 0.06173665004754539, "aux_distill/n_active_tok": 387.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.512218045112782, "calib/avg_num_step_conf": 6.05078125, "calib/ece": 0.32298039215686275, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002257727652464493, "calib/mean_conf": 0.009490196078431374, "calib/mu_c": 0.007976190476190477, "calib/mu_w": 0.01023391812865497, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015294117647058824, "calib/std_conf": 0.01980628989915568, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1834.0, "completions/max_terminated_length": 1834.0, "completions/mean_length": 340.484375, "completions/mean_terminated_length": 340.484375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.07893333333333333, "grad_norm": 0.005016027018427849, "learning_rate": 3.5e-06, "loss": 0.0484, "num_tokens": 14415179.0, "reward": 0.998470664024353, "reward_std": 0.015169315040111542, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6727226972579956, "rewards/format_reward_step": 0.99609375, "step": 74 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2712880775798112, "aux_distill/mean_u": 0.08906336368210896, "aux_distill/n_active_tok": 379.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5247783797094311, "calib/avg_num_step_conf": 5.90234375, "calib/ece": 0.4777254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00031273085446934315, "calib/mean_conf": 0.008549019607843138, "calib/mu_c": 0.00870967741935484, "calib/mu_w": 0.008396946564885497, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01226554831670478, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 335.21484375, "completions/mean_terminated_length": 336.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.08, "grad_norm": 0.00509895384311676, "learning_rate": 3.4722222222222224e-06, "loss": 0.0256, "num_tokens": 14605746.0, "reward": 1.0002011060714722, "reward_std": 0.017752250656485558, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5199335813522339, "rewards/format_reward_step": 0.99609375, "step": 75 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.30028481944464147, "aux_distill/mean_u": 0.09071633159242892, "aux_distill/n_active_tok": 384.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4678846153846155, "calib/avg_num_step_conf": 6.0078125, "calib/ece": 0.4003149606299213, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009756410256410268, "calib/mean_conf": 0.009133858267716536, "calib/mu_c": 0.008557692307692306, "calib/mu_w": 0.009533333333333333, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011190802047579116, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1724.0, "completions/max_terminated_length": 1724.0, "completions/mean_length": 338.84375, "completions/mean_terminated_length": 340.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.08106666666666666, "grad_norm": 0.004829046316444874, "learning_rate": 3.444444444444445e-06, "loss": 0.008, "num_tokens": 14795546.0, "reward": 0.9955216646194458, "reward_std": 0.026836896315217018, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5926058292388916, "rewards/format_reward_step": 0.98828125, "step": 76 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28181065688841045, "aux_distill/mean_u": 0.08920006079870602, "aux_distill/n_active_tok": 400.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5287990196078431, "calib/avg_num_step_conf": 6.26171875, "calib/ece": 0.3913779527559056, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001964654282765736, "calib/mean_conf": 0.010196850393700788, "calib/mu_c": 0.011372549019607842, "calib/mu_w": 0.009407894736842106, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.016870296950967947, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2167.0, "completions/max_terminated_length": 2167.0, "completions/mean_length": 337.578125, "completions/mean_terminated_length": 337.578125, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.08213333333333334, "grad_norm": 0.004952338989824057, "learning_rate": 3.416666666666667e-06, "loss": 0.1114, "num_tokens": 14986630.0, "reward": 0.994382381439209, "reward_std": 0.0341326966881752, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6020461320877075, "rewards/format_reward_step": 0.98828125, "step": 77 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29722038214094937, "aux_distill/mean_u": 0.09135029054194987, "aux_distill/n_active_tok": 383.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5404382203162691, "calib/avg_num_step_conf": 6.0078125, "calib/ece": 0.344156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016061377646743504, "calib/mean_conf": 0.016, "calib/mu_c": 0.017032967032967035, "calib/mu_w": 0.015426829268292684, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001647058823529412, "calib/std_conf": 0.04928190217872014, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 854.0, "completions/max_terminated_length": 854.0, "completions/mean_length": 359.78515625, "completions/mean_terminated_length": 361.19610595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.0832, "grad_norm": 0.004928613547235727, "learning_rate": 3.3888888888888893e-06, "loss": 0.0262, "num_tokens": 15186759.0, "reward": 1.0008113384246826, "reward_std": 0.021877873688936234, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6500601768493652, "rewards/format_reward_step": 0.99609375, "step": 78 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27827800950035453, "aux_distill/mean_u": 0.08564135031592453, "aux_distill/n_active_tok": 423.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5051779325927321, "calib/avg_num_step_conf": 6.62109375, "calib/ece": 0.43377952755905513, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001154835875227517, "calib/mean_conf": 0.011968503937007874, "calib/mu_c": 0.011327433628318584, "calib/mu_w": 0.012482269503546101, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00043307086614173226, "calib/std_conf": 0.012488695365610609, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2727.0, "completions/max_terminated_length": 2727.0, "completions/mean_length": 395.79296875, "completions/mean_terminated_length": 395.79296875, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.08426666666666667, "grad_norm": 0.004926387686282396, "learning_rate": 3.3611111111111117e-06, "loss": 0.088, "num_tokens": 15394458.0, "reward": 0.9950859546661377, "reward_std": 0.03354940935969353, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5604844093322754, "rewards/format_reward_step": 0.98828125, "step": 79 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31684785219840705, "aux_distill/mean_u": 0.10744304575263432, "aux_distill/n_active_tok": 405.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5537071078431373, "calib/avg_num_step_conf": 6.375, "calib/ece": 0.45816406249999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017205882352941178, "calib/mean_conf": 0.011835937500000001, "calib/mu_c": 0.012750000000000001, "calib/mu_w": 0.011029411764705883, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000625, "calib/std_conf": 0.013669810111925248, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1007.0, "completions/max_terminated_length": 1007.0, "completions/mean_length": 334.9765625, "completions/mean_terminated_length": 336.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.08533333333333333, "grad_norm": 0.005264780949801207, "learning_rate": 3.3333333333333333e-06, "loss": 0.0389, "num_tokens": 15582372.0, "reward": 1.0019071102142334, "reward_std": 0.018337277695536613, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5389702916145325, "rewards/format_reward_step": 0.99609375, "step": 80 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3030662953387946, "aux_distill/mean_u": 0.09600267836191516, "aux_distill/n_active_tok": 392.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.571330777656079, "calib/avg_num_step_conf": 6.125, "calib/ece": 0.3348031496062992, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016675794085432624, "calib/mean_conf": 0.013228346456692915, "calib/mu_c": 0.01431818181818182, "calib/mu_w": 0.012650602409638558, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007874015748031496, "calib/std_conf": 0.01678403298994395, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 359.35546875, "completions/mean_terminated_length": 359.35546875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.0864, "grad_norm": 0.004292392637580633, "learning_rate": 3.3055555555555558e-06, "loss": 0.0761, "num_tokens": 15780615.0, "reward": 0.9968827962875366, "reward_std": 0.028621265664696693, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6578280925750732, "rewards/format_reward_step": 0.9921875, "step": 81 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2719600473064929, "aux_distill/mean_u": 0.06987277391391887, "aux_distill/n_active_tok": 367.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.527185331380913, "calib/avg_num_step_conf": 5.7421875, "calib/ece": 0.4353174603174602, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0024269434010313895, "calib/mean_conf": 0.016825396825396827, "calib/mu_c": 0.015486725663716814, "calib/mu_w": 0.017913669064748203, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.001865079365079365, "calib/std_conf": 0.03316985611969488, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2329.0, "completions/max_terminated_length": 2329.0, "completions/mean_length": 350.4140625, "completions/mean_terminated_length": 350.4140625, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "epoch": 0.08746666666666666, "grad_norm": 0.004946700297296047, "learning_rate": 3.277777777777778e-06, "loss": 0.0753, "num_tokens": 15975873.0, "reward": 0.9825640320777893, "reward_std": 0.04624699801206589, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5510656237602234, "rewards/format_reward_step": 0.97265625, "step": 82 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29490388510748744, "aux_distill/mean_u": 0.10091545887367892, "aux_distill/n_active_tok": 411.9375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.542888678650268, "calib/avg_num_step_conf": 6.41796875, "calib/ece": 0.39640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015540838852097123, "calib/mean_conf": 0.013750000000000002, "calib/mu_c": 0.014666666666666666, "calib/mu_w": 0.013112582781456954, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011353688827865594, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1210.0, "completions/max_terminated_length": 1210.0, "completions/mean_length": 383.4453125, "completions/mean_terminated_length": 384.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.08853333333333334, "grad_norm": 0.004362845327705145, "learning_rate": 3.2500000000000002e-06, "loss": 0.0247, "num_tokens": 16181299.0, "reward": 1.0038261413574219, "reward_std": 0.013152113184332848, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6014023423194885, "rewards/format_reward_step": 0.99609375, "step": 83 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.30570834246464074, "aux_distill/mean_u": 0.09895186440162848, "aux_distill/n_active_tok": 367.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5347222222222222, "calib/avg_num_step_conf": 5.7421875, "calib/ece": 0.31615079365079357, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006130952380952372, "calib/mean_conf": 0.02257936507936508, "calib/mu_c": 0.026666666666666658, "calib/mu_w": 0.020535714285714286, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002698412698412698, "calib/std_conf": 0.04605997697526515, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 973.0, "completions/max_terminated_length": 973.0, "completions/mean_length": 322.72265625, "completions/mean_terminated_length": 323.9882507324219, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.0896, "grad_norm": 0.005511968396604061, "learning_rate": 3.2222222222222227e-06, "loss": 0.0122, "num_tokens": 16369836.0, "reward": 0.9879236221313477, "reward_std": 0.0691649466753006, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6672534942626953, "rewards/format_reward_step": 0.98046875, "step": 84 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2954780561849475, "aux_distill/mean_u": 0.10002237214826427, "aux_distill/n_active_tok": 384.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5420512820512821, "calib/avg_num_step_conf": 6.00390625, "calib/ece": 0.395092125984252, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002288461538461524, "calib/mean_conf": 0.017506299212598424, "calib/mu_c": 0.017371153846153845, "calib/mu_w": 0.017599999999999998, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0015748031496062992, "calib/std_conf": 0.01828600966029217, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2014.0, "completions/max_terminated_length": 2014.0, "completions/mean_length": 366.33984375, "completions/mean_terminated_length": 366.33984375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.09066666666666667, "grad_norm": 0.0047206999734044075, "learning_rate": 3.1944444444444443e-06, "loss": 0.0609, "num_tokens": 16571443.0, "reward": 0.99502032995224, "reward_std": 0.04041251540184021, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5955094695091248, "rewards/format_reward_step": 0.98828125, "step": 85 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2898980306927115, "aux_distill/mean_u": 0.09469991303484078, "aux_distill/n_active_tok": 381.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4839870820157438, "calib/avg_num_step_conf": 5.984375, "calib/ece": 0.3355859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005860862544573766, "calib/mean_conf": 0.0237109375, "calib/mu_c": 0.019887640449438203, "calib/mu_w": 0.02574850299401197, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0058203125, "calib/std_conf": 0.059770871399629874, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 978.0, "completions/max_terminated_length": 978.0, "completions/mean_length": 349.9609375, "completions/mean_terminated_length": 351.3333435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.09173333333333333, "grad_norm": 0.005566046107560396, "learning_rate": 3.1666666666666667e-06, "loss": 0.0274, "num_tokens": 16766545.0, "reward": 0.9970375299453735, "reward_std": 0.03623506426811218, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6542313098907471, "rewards/format_reward_step": 0.9921875, "step": 86 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28562748548574746, "aux_distill/mean_u": 0.09414752872878823, "aux_distill/n_active_tok": 366.3125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5269451920724249, "calib/avg_num_step_conf": 5.69921875, "calib/ece": 0.4659375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": -0.01422069977978957, "calib/mean_conf": 0.025312500000000005, "calib/mu_c": 0.017868852459016392, "calib/mu_w": 0.03208955223880596, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00734375, "calib/std_conf": 0.08236198208973604, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 935.0, "completions/max_terminated_length": 935.0, "completions/mean_length": 334.71484375, "completions/mean_terminated_length": 336.0274658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.0928, "grad_norm": 0.0058571891859173775, "learning_rate": 3.138888888888889e-06, "loss": -0.0098, "num_tokens": 16957728.0, "reward": 1.0048034191131592, "reward_std": 0.018815290182828903, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5330445170402527, "rewards/format_reward_step": 1.0, "step": 87 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28512857877649367, "aux_distill/mean_u": 0.090462066060402, "aux_distill/n_active_tok": 408.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5339267285861713, "calib/avg_num_step_conf": 6.3671875, "calib/ece": 0.3851968503937008, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019569143446852445, "calib/mean_conf": 0.016377952755905513, "calib/mu_c": 0.01754901960784314, "calib/mu_w": 0.015592105263157895, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012653423260850814, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 391.703125, "completions/mean_terminated_length": 393.2392272949219, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.09386666666666667, "grad_norm": 0.005111593287438154, "learning_rate": 3.1111111111111116e-06, "loss": 0.0367, "num_tokens": 17167852.0, "reward": 0.9911565780639648, "reward_std": 0.05165533721446991, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5995007753372192, "rewards/format_reward_step": 0.984375, "step": 88 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3024716544896364, "aux_distill/mean_u": 0.09634288435249655, "aux_distill/n_active_tok": 387.75, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5104699703743604, "calib/avg_num_step_conf": 6.0703125, "calib/ece": 0.3505952380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002077834635065976, "calib/mean_conf": 0.02242063492063492, "calib/mu_c": 0.02372340425531914, "calib/mu_w": 0.021645569620253165, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.02292356283238295, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1989.0, "completions/max_terminated_length": 1989.0, "completions/mean_length": 395.765625, "completions/mean_terminated_length": 397.3176574707031, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.09493333333333333, "grad_norm": 0.004692970775067806, "learning_rate": 3.0833333333333336e-06, "loss": 0.0499, "num_tokens": 17378056.0, "reward": 0.9808880090713501, "reward_std": 0.07970473170280457, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6219324469566345, "rewards/format_reward_step": 0.97265625, "step": 89 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31103678117506206, "aux_distill/mean_u": 0.12257401174085307, "aux_distill/n_active_tok": 406.15625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4879192546583851, "calib/avg_num_step_conf": 6.3671875, "calib/ece": 0.4323137254901961, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016366459627329237, "calib/mean_conf": 0.018666666666666665, "calib/mu_c": 0.01956521739130435, "calib/mu_w": 0.017928571428571426, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.01844405070845197, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 984.0, "completions/max_terminated_length": 984.0, "completions/mean_length": 382.62890625, "completions/mean_terminated_length": 384.1294250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.096, "grad_norm": 0.004809149540960789, "learning_rate": 3.055555555555556e-06, "loss": 0.0212, "num_tokens": 17579329.0, "reward": 0.9986032843589783, "reward_std": 0.038281530141830444, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5597066283226013, "rewards/format_reward_step": 0.98828125, "step": 90 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2773177553899586, "aux_distill/mean_u": 0.07881918878088494, "aux_distill/n_active_tok": 398.0, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5500914768784304, "calib/avg_num_step_conf": 6.21875, "calib/ece": 0.46099206349206345, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019210144470380441, "calib/mean_conf": 0.01916666666666667, "calib/mu_c": 0.02016528925619835, "calib/mu_w": 0.018244274809160306, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.018827221932996, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2924.0, "completions/max_terminated_length": 2924.0, "completions/mean_length": 424.81640625, "completions/mean_terminated_length": 424.81640625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09706666666666666, "grad_norm": 0.004709687549620867, "learning_rate": 3.0277777777777776e-06, "loss": 0.0682, "num_tokens": 17795794.0, "reward": 0.9818326234817505, "reward_std": 0.07131138443946838, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.518352746963501, "rewards/format_reward_step": 0.97265625, "step": 91 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24812287255190313, "aux_distill/mean_u": 0.06410933965058271, "aux_distill/n_active_tok": 388.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5300342549547346, "calib/avg_num_step_conf": 6.140625, "calib/ece": 0.45496093750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021079031074137493, "calib/mean_conf": 0.021601562500000004, "calib/mu_c": 0.022704918032786885, "calib/mu_w": 0.020597014925373136, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022329506769263707, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 381.68359375, "completions/mean_terminated_length": 383.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.09813333333333334, "grad_norm": 0.005170757882297039, "learning_rate": 3e-06, "loss": 0.0344, "num_tokens": 18000225.0, "reward": 1.0103375911712646, "reward_std": 0.01272611878812313, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5441129207611084, "rewards/format_reward_step": 1.0, "step": 92 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2716197760310024, "aux_distill/mean_u": 0.08429833158499851, "aux_distill/n_active_tok": 430.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5044732232232232, "calib/avg_num_step_conf": 6.72265625, "calib/ece": 0.41529411764705887, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0028697447447447386, "calib/mean_conf": 0.024235294117647063, "calib/mu_c": 0.02585585585585585, "calib/mu_w": 0.02298611111111111, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0021176470588235297, "calib/std_conf": 0.055906991472460144, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1958.0, "completions/max_terminated_length": 1958.0, "completions/mean_length": 417.24609375, "completions/mean_terminated_length": 417.24609375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.0992, "grad_norm": 0.004604802001267672, "learning_rate": 2.9722222222222225e-06, "loss": 0.0576, "num_tokens": 18212816.0, "reward": 1.005455493927002, "reward_std": 0.02824069932103157, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5812234282493591, "rewards/format_reward_step": 0.99609375, "step": 93 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2312735291197896, "aux_distill/mean_u": 0.0755610747205075, "aux_distill/n_active_tok": 407.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5466537966537968, "calib/avg_num_step_conf": 6.36328125, "calib/ece": 0.3968379446640316, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003857142857142861, "calib/mean_conf": 0.023399209486166007, "calib/mu_c": 0.02114285714285714, "calib/mu_w": 0.025, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002608695652173913, "calib/std_conf": 0.0438681405386164, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 424.89453125, "completions/mean_terminated_length": 426.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.10026666666666667, "grad_norm": 0.004604880232363939, "learning_rate": 2.944444444444445e-06, "loss": 0.0519, "num_tokens": 18430269.0, "reward": 0.9957316517829895, "reward_std": 0.044136419892311096, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.593025803565979, "rewards/format_reward_step": 0.98828125, "step": 94 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2925974018871784, "aux_distill/mean_u": 0.10130479070489674, "aux_distill/n_active_tok": 403.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5033488372093023, "calib/avg_num_step_conf": 6.296875, "calib/ece": 0.47114173228346456, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00084403100775194, "calib/mean_conf": 0.025708661417322837, "calib/mu_c": 0.02528, "calib/mu_w": 0.02612403100775194, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.002362204724409449, "calib/std_conf": 0.04316341594638858, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2479.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 414.8828125, "completions/mean_terminated_length": 414.8828125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.10133333333333333, "grad_norm": 0.004823321010917425, "learning_rate": 2.916666666666667e-06, "loss": 0.0705, "num_tokens": 18642607.0, "reward": 0.9992181062698364, "reward_std": 0.04958602786064148, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5257800817489624, "rewards/format_reward_step": 0.984375, "step": 95 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26682930160313845, "aux_distill/mean_u": 0.09679726153203617, "aux_distill/n_active_tok": 406.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5002578150177248, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.5546825396825397, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00042926200451176913, "calib/mean_conf": 0.020714285714285716, "calib/mu_c": 0.020896551724137933, "calib/mu_w": 0.020467289719626164, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01755619419602455, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 430.94921875, "completions/mean_terminated_length": 430.94921875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.1024, "grad_norm": 0.004958832170814276, "learning_rate": 2.888888888888889e-06, "loss": 0.1083, "num_tokens": 18858746.0, "reward": 0.9919416904449463, "reward_std": 0.06605525314807892, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.4370085895061493, "rewards/format_reward_step": 0.98046875, "step": 96 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2626429311931133, "aux_distill/mean_u": 0.07569661717053826, "aux_distill/n_active_tok": 415.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5216194482246691, "calib/avg_num_step_conf": 6.5390625, "calib/ece": 0.437921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.009319559569466784, "calib/mean_conf": 0.024823529411764703, "calib/mu_c": 0.02983050847457627, "calib/mu_w": 0.020510948905109485, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06323833098042919, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 411.3984375, "completions/mean_terminated_length": 413.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.10346666666666667, "grad_norm": 0.004902747459709644, "learning_rate": 2.861111111111111e-06, "loss": 0.0518, "num_tokens": 19069136.0, "reward": 1.0075451135635376, "reward_std": 0.026664312928915024, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5580589771270752, "rewards/format_reward_step": 0.99609375, "step": 97 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3062682868912816, "aux_distill/mean_u": 0.12505230247901689, "aux_distill/n_active_tok": 402.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49301054771889696, "calib/avg_num_step_conf": 6.2890625, "calib/ece": 0.4651394422310757, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0001880798068369556, "calib/mean_conf": 0.02091633466135458, "calib/mu_c": 0.02081967213114754, "calib/mu_w": 0.021007751937984497, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01900820152488685, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 452.80078125, "completions/mean_terminated_length": 454.5765075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.10453333333333334, "grad_norm": 0.0052138082683086395, "learning_rate": 2.8333333333333335e-06, "loss": 0.0887, "num_tokens": 19291237.0, "reward": 0.9880070090293884, "reward_std": 0.0714532807469368, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5228890180587769, "rewards/format_reward_step": 0.9765625, "step": 98 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24276379751972854, "aux_distill/mean_u": 0.0592441827702998, "aux_distill/n_active_tok": 411.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49762761780104714, "calib/avg_num_step_conf": 6.4765625, "calib/ece": 0.2354509803921569, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004008507853403141, "calib/mean_conf": 0.02862745098039216, "calib/mu_c": 0.025625000000000002, "calib/mu_w": 0.029633507853403143, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0065490196078431375, "calib/std_conf": 0.04644286423769036, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 477.36328125, "completions/mean_terminated_length": 479.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.1056, "grad_norm": 0.00454300781711936, "learning_rate": 2.805555555555556e-06, "loss": -0.0003, "num_tokens": 19519242.0, "reward": 0.9911375045776367, "reward_std": 0.040163010358810425, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7479000091552734, "rewards/format_reward_step": 0.984375, "step": 99 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28997284499928355, "aux_distill/mean_u": 0.10573104992759977, "aux_distill/n_active_tok": 403.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.44573888818538693, "calib/avg_num_step_conf": 6.31640625, "calib/ece": 0.39447058823529413, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009021780422945425, "calib/mean_conf": 0.028196078431372548, "calib/mu_c": 0.02292452830188679, "calib/mu_w": 0.031946308724832215, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0034901960784313726, "calib/std_conf": 0.05780868365730124, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 438.28515625, "completions/mean_terminated_length": 440.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.10666666666666667, "grad_norm": 0.0051354048773646355, "learning_rate": 2.7777777777777783e-06, "loss": 0.012, "num_tokens": 19738851.0, "reward": 1.0054786205291748, "reward_std": 0.020914018154144287, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5968949198722839, "rewards/format_reward_step": 0.99609375, "step": 100 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2594314431771636, "aux_distill/mean_u": 0.0661123765385953, "aux_distill/n_active_tok": 425.03125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5487693389592124, "calib/avg_num_step_conf": 6.6171875, "calib/ece": 0.34370967741935476, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.0009760900140646997, "calib/mean_conf": 0.03282258064516129, "calib/mu_c": 0.03344444444444445, "calib/mu_w": 0.03246835443037975, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0068145161290322585, "calib/std_conf": 0.08555379824029012, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 521.08203125, "completions/mean_terminated_length": 521.08203125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.10773333333333333, "grad_norm": 0.004390342626720667, "learning_rate": 2.7500000000000004e-06, "loss": 0.1169, "num_tokens": 19979240.0, "reward": 0.9725391864776611, "reward_std": 0.10312092304229736, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6286722421646118, "rewards/format_reward_step": 0.96484375, "step": 101 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24633567919954658, "aux_distill/mean_u": 0.07292928626486234, "aux_distill/n_active_tok": 420.9375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5528360528360529, "calib/avg_num_step_conf": 6.5234375, "calib/ece": 0.4944578313253012, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.012048192771084338, "calib/gap": 0.02542540792540793, "calib/mean_conf": 0.04040160642570282, "calib/mu_c": 0.05234848484848485, "calib/mu_w": 0.02692307692307692, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0023694779116465864, "calib/std_conf": 0.10869728495438853, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2759.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 456.921875, "completions/mean_terminated_length": 456.921875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.1088, "grad_norm": 0.004803146701306105, "learning_rate": 2.7222222222222224e-06, "loss": 0.0851, "num_tokens": 20202908.0, "reward": 0.9931085705757141, "reward_std": 0.07233564555644989, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.497935950756073, "rewards/format_reward_step": 0.97265625, "step": 102 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2805954380892217, "aux_distill/mean_u": 0.0894842856011007, "aux_distill/n_active_tok": 416.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4522317188983856, "calib/avg_num_step_conf": 6.50390625, "calib/ece": 0.44277777777777777, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.012991452991452997, "calib/mean_conf": 0.030634920634920636, "calib/mu_c": 0.02367521367521367, "calib/mu_w": 0.03666666666666667, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004563492063492063, "calib/std_conf": 0.061608673267804566, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2636.0, "completions/max_terminated_length": 2636.0, "completions/mean_length": 532.38671875, "completions/mean_terminated_length": 532.38671875, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.10986666666666667, "grad_norm": 0.004372611176222563, "learning_rate": 2.6944444444444444e-06, "loss": 0.0946, "num_tokens": 20443751.0, "reward": 0.9928652048110962, "reward_std": 0.06021931394934654, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5443242192268372, "rewards/format_reward_step": 0.984375, "step": 103 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24913631053641438, "aux_distill/mean_u": 0.07466481954948698, "aux_distill/n_active_tok": 433.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.528455790784558, "calib/avg_num_step_conf": 6.83203125, "calib/ece": 0.401640234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002628555417185558, "calib/mean_conf": 0.028047265625000005, "calib/mu_c": 0.029546363636363638, "calib/mu_w": 0.02691780821917808, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.021453059048144936, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1032.0, "completions/max_terminated_length": 1032.0, "completions/mean_length": 441.9140625, "completions/mean_terminated_length": 443.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.11093333333333333, "grad_norm": 0.005111818201839924, "learning_rate": 2.666666666666667e-06, "loss": 0.0324, "num_tokens": 20663561.0, "reward": 1.008166790008545, "reward_std": 0.025309395045042038, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5905523300170898, "rewards/format_reward_step": 0.99609375, "step": 104 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25391951971687376, "aux_distill/mean_u": 0.07310204909175883, "aux_distill/n_active_tok": 430.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5330645161290323, "calib/avg_num_step_conf": 6.7265625, "calib/ece": 0.4659055118110236, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0021339950372208476, "calib/mean_conf": 0.02818897637795276, "calib/mu_c": 0.027096774193548386, "calib/mu_w": 0.029230769230769234, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002952755905511811, "calib/std_conf": 0.025669532871671733, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 485.95703125, "completions/mean_terminated_length": 487.8627624511719, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.112, "grad_norm": 0.004727492108941078, "learning_rate": 2.6388888888888893e-06, "loss": 0.0597, "num_tokens": 20893726.0, "reward": 1.0006881952285767, "reward_std": 0.04665821045637131, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5287203192710876, "rewards/format_reward_step": 0.98828125, "step": 105 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28392570302821696, "aux_distill/mean_u": 0.09882959122606673, "aux_distill/n_active_tok": 381.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5003563330093943, "calib/avg_num_step_conf": 5.96875, "calib/ece": 0.38511904761904764, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00031292517006803466, "calib/mean_conf": 0.03694444444444445, "calib/mu_c": 0.03676190476190476, "calib/mu_w": 0.037074829931972794, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0026984126984126986, "calib/std_conf": 0.0537316680522342, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2148.0, "completions/max_terminated_length": 2148.0, "completions/mean_length": 448.796875, "completions/mean_terminated_length": 450.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.11306666666666666, "grad_norm": 0.005242538638412952, "learning_rate": 2.6111111111111113e-06, "loss": 0.0689, "num_tokens": 21113202.0, "reward": 0.9895493984222412, "reward_std": 0.08381446450948715, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5923800468444824, "rewards/format_reward_step": 0.9765625, "step": 106 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2789200625848025, "aux_distill/mean_u": 0.08994341773107507, "aux_distill/n_active_tok": 420.375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4842135847107438, "calib/avg_num_step_conf": 6.5625, "calib/ece": 0.45875502008032126, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010996900826446276, "calib/mean_conf": 0.040281124497991976, "calib/mu_c": 0.03462809917355372, "calib/mu_w": 0.045625, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006546184738955823, "calib/std_conf": 0.06681149894070164, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 493.15234375, "completions/mean_terminated_length": 495.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.11413333333333334, "grad_norm": 0.00451470073312521, "learning_rate": 2.5833333333333337e-06, "loss": 0.0982, "num_tokens": 21344065.0, "reward": 0.9860634803771973, "reward_std": 0.08944529294967651, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5268144607543945, "rewards/format_reward_step": 0.97265625, "step": 107 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2768033891916275, "aux_distill/mean_u": 0.1026058405370248, "aux_distill/n_active_tok": 409.3125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5298049599586669, "calib/avg_num_step_conf": 6.390625, "calib/ece": 0.584296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": 0.0021234823043141393, "calib/mean_conf": 0.04171875, "calib/mu_c": 0.04253164556962026, "calib/mu_w": 0.04040816326530612, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0044140625, "calib/std_conf": 0.09350475735724625, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 453.48046875, "completions/mean_terminated_length": 455.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.1152, "grad_norm": 0.004895161371678114, "learning_rate": 2.5555555555555557e-06, "loss": 0.013, "num_tokens": 21563388.0, "reward": 1.0190551280975342, "reward_std": 0.03934864327311516, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.4248288869857788, "rewards/format_reward_step": 0.99609375, "step": 108 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25648779910989106, "aux_distill/mean_u": 0.06899389784792351, "aux_distill/n_active_tok": 466.3125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5520887558563248, "calib/avg_num_step_conf": 7.1953125, "calib/ece": 0.4136546184738957, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.009102030192608024, "calib/mean_conf": 0.049718875502008035, "calib/mu_c": 0.054690265486725675, "calib/mu_w": 0.04558823529411765, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004779116465863454, "calib/std_conf": 0.09181963114604492, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2755.0, "completions/max_terminated_length": 2755.0, "completions/mean_length": 523.25390625, "completions/mean_terminated_length": 523.25390625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.11626666666666667, "grad_norm": 0.004565491806715727, "learning_rate": 2.5277777777777778e-06, "loss": 0.1106, "num_tokens": 21801941.0, "reward": 0.9934476613998413, "reward_std": 0.08000709116458893, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5689265727996826, "rewards/format_reward_step": 0.97265625, "step": 109 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2622479344718158, "aux_distill/mean_u": 0.07462715782623361, "aux_distill/n_active_tok": 385.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47438095238095235, "calib/avg_num_step_conf": 6.01953125, "calib/ece": 0.38908549019607847, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.021064761904761906, "calib/mean_conf": 0.049659607843137256, "calib/mu_c": 0.03726857142857143, "calib/mu_w": 0.058333333333333334, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013490196078431372, "calib/std_conf": 0.09347799770399542, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2308.0, "completions/max_terminated_length": 2308.0, "completions/mean_length": 444.89453125, "completions/mean_terminated_length": 444.89453125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.11733333333333333, "grad_norm": 0.0050817145965993404, "learning_rate": 2.5e-06, "loss": 0.061, "num_tokens": 22020754.0, "reward": 1.005799412727356, "reward_std": 0.04130372405052185, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6053489446640015, "rewards/format_reward_step": 0.99609375, "step": 110 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27884344570338726, "aux_distill/mean_u": 0.09474326124953433, "aux_distill/n_active_tok": 379.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5250907270679516, "calib/avg_num_step_conf": 5.93359375, "calib/ece": 0.43691699604743095, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012169315479914913, "calib/mean_conf": 0.05083003952569171, "calib/mu_c": 0.05713114754098362, "calib/mu_w": 0.044961832061068706, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0027667984189723325, "calib/std_conf": 0.10036545261126484, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 461.01953125, "completions/mean_terminated_length": 462.8274841308594, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.1184, "grad_norm": 0.00502881919965148, "learning_rate": 2.4722222222222226e-06, "loss": 0.0708, "num_tokens": 22246183.0, "reward": 1.0092535018920898, "reward_std": 0.06828673183917999, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5536632537841797, "rewards/format_reward_step": 0.98828125, "step": 111 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2952077784575522, "aux_distill/mean_u": 0.12138369561467421, "aux_distill/n_active_tok": 394.25, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5290826284970722, "calib/avg_num_step_conf": 6.22265625, "calib/ece": 0.37752988047808766, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.02160897852960314, "calib/mean_conf": 0.0447808764940239, "calib/mu_c": 0.05726415094339624, "calib/mu_w": 0.0356551724137931, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.08238629610369641, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 531.4375, "completions/mean_terminated_length": 533.5216064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.11946666666666667, "grad_norm": 0.004346077796071768, "learning_rate": 2.4444444444444447e-06, "loss": 0.0749, "num_tokens": 22490151.0, "reward": 0.999869167804718, "reward_std": 0.07355723530054092, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6052069664001465, "rewards/format_reward_step": 0.98046875, "step": 112 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2909727890510112, "aux_distill/mean_u": 0.09329004138584782, "aux_distill/n_active_tok": 421.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5154938271604937, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.42960784313725486, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.00021296296296297035, "calib/mean_conf": 0.05180392156862745, "calib/mu_c": 0.05191666666666667, "calib/mu_w": 0.0517037037037037, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005411764705882353, "calib/std_conf": 0.08042948917977372, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2453.0, "completions/max_terminated_length": 2453.0, "completions/mean_length": 429.10546875, "completions/mean_terminated_length": 429.10546875, "completions/min_length": 207.0, "completions/min_terminated_length": 207.0, "epoch": 0.12053333333333334, "grad_norm": 0.005415992345660925, "learning_rate": 2.4166666666666667e-06, "loss": 0.0887, "num_tokens": 22705202.0, "reward": 1.0138792991638184, "reward_std": 0.0524032860994339, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5668210983276367, "rewards/format_reward_step": 0.9921875, "step": 113 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2658761255443096, "aux_distill/mean_u": 0.08234790336442177, "aux_distill/n_active_tok": 377.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4665414922432899, "calib/avg_num_step_conf": 5.88671875, "calib/ece": 0.46392156862745093, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.0002327013050972826, "calib/mean_conf": 0.06423529411764707, "calib/mu_c": 0.06412213740458016, "calib/mu_w": 0.06435483870967744, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.007215686274509805, "calib/std_conf": 0.12498197332530019, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1709.0, "completions/max_terminated_length": 1709.0, "completions/mean_length": 406.2265625, "completions/mean_terminated_length": 406.2265625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.1216, "grad_norm": 0.005368839483708143, "learning_rate": 2.388888888888889e-06, "loss": 0.0373, "num_tokens": 22914220.0, "reward": 1.0151684284210205, "reward_std": 0.0704091489315033, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5264304876327515, "rewards/format_reward_step": 0.9921875, "step": 114 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24501192453317344, "aux_distill/mean_u": 0.07385531746068447, "aux_distill/n_active_tok": 413.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.519644762442729, "calib/avg_num_step_conf": 6.45703125, "calib/ece": 0.4083858267716536, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.0037977781961965745, "calib/mean_conf": 0.04405511811023623, "calib/mu_c": 0.04194690265486725, "calib/mu_w": 0.045744680851063826, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0037795275590551177, "calib/std_conf": 0.06914333020328298, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 990.0, "completions/max_terminated_length": 990.0, "completions/mean_length": 409.91015625, "completions/mean_terminated_length": 411.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.12266666666666666, "grad_norm": 0.005021527409553528, "learning_rate": 2.361111111111111e-06, "loss": 0.017, "num_tokens": 23124421.0, "reward": 1.007368564605713, "reward_std": 0.04787181690335274, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5811433792114258, "rewards/format_reward_step": 0.9921875, "step": 115 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25968706305138767, "aux_distill/mean_u": 0.07294114098545383, "aux_distill/n_active_tok": 454.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48019192913385833, "calib/avg_num_step_conf": 7.1015625, "calib/ece": 0.4547450980392157, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.004753321850393698, "calib/mean_conf": 0.05396078431372548, "calib/mu_c": 0.05157480314960631, "calib/mu_w": 0.05632812500000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005333333333333333, "calib/std_conf": 0.09392319782450408, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 480.7265625, "completions/mean_terminated_length": 480.7265625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.12373333333333333, "grad_norm": 0.004678206518292427, "learning_rate": 2.3333333333333336e-06, "loss": 0.0504, "num_tokens": 23352007.0, "reward": 1.0158360004425049, "reward_std": 0.043127551674842834, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5394843816757202, "rewards/format_reward_step": 0.99609375, "step": 116 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2503124848008156, "aux_distill/mean_u": 0.06315268753420264, "aux_distill/n_active_tok": 422.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5624794799395889, "calib/avg_num_step_conf": 6.6015625, "calib/ece": 0.3496062992125984, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.0007735241972552376, "calib/mean_conf": 0.07078740157480315, "calib/mu_c": 0.07030927835051547, "calib/mu_w": 0.0710828025477707, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.019251968503937008, "calib/std_conf": 0.13701316851024462, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1923.0, "completions/max_terminated_length": 1923.0, "completions/mean_length": 429.171875, "completions/mean_terminated_length": 430.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.1248, "grad_norm": 0.004880519118160009, "learning_rate": 2.305555555555556e-06, "loss": 0.0006, "num_tokens": 23568475.0, "reward": 1.0031300783157349, "reward_std": 0.07905666530132294, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6390726566314697, "rewards/format_reward_step": 0.98828125, "step": 117 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2309753114823252, "aux_distill/mean_u": 0.0686600635114206, "aux_distill/n_active_tok": 419.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4429446319210099, "calib/avg_num_step_conf": 6.55078125, "calib/ece": 0.4574703557312253, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.013482689663792007, "calib/mean_conf": 0.05446640316205534, "calib/mu_c": 0.0476984126984127, "calib/mu_w": 0.061181102362204705, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006956521739130434, "calib/std_conf": 0.09215584335319088, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2754.0, "completions/max_terminated_length": 2754.0, "completions/mean_length": 433.08203125, "completions/mean_terminated_length": 434.7804260253906, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.12586666666666665, "grad_norm": 0.004958142526447773, "learning_rate": 2.277777777777778e-06, "loss": 0.0466, "num_tokens": 23783352.0, "reward": 1.006095290184021, "reward_std": 0.06497341394424438, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.531721830368042, "rewards/format_reward_step": 0.98828125, "step": 118 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29260341147892177, "aux_distill/mean_u": 0.08997504566866737, "aux_distill/n_active_tok": 431.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5216897856242118, "calib/avg_num_step_conf": 6.73828125, "calib/ece": 0.4380952380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.012141235813366957, "calib/mean_conf": 0.046031746031746035, "calib/mu_c": 0.052295081967213115, "calib/mu_w": 0.04015384615384616, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0731174242048241, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2578.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 465.40625, "completions/mean_terminated_length": 467.2314147949219, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.12693333333333334, "grad_norm": 0.004794496577233076, "learning_rate": 2.25e-06, "loss": 0.0309, "num_tokens": 24007560.0, "reward": 1.0056226253509521, "reward_std": 0.05668995529413223, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5503078103065491, "rewards/format_reward_step": 0.984375, "step": 119 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28152807522565126, "aux_distill/mean_u": 0.08463958399267932, "aux_distill/n_active_tok": 378.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5635561860120882, "calib/avg_num_step_conf": 5.90234375, "calib/ece": 0.482078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.016905143702972733, "calib/mean_conf": 0.0507843137254902, "calib/mu_c": 0.05880597014925373, "calib/mu_w": 0.041900826446281, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003686274509803921, "calib/std_conf": 0.09609723705511551, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 394.36328125, "completions/mean_terminated_length": 394.36328125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.128, "grad_norm": 0.005581455770879984, "learning_rate": 2.222222222222222e-06, "loss": 0.0293, "num_tokens": 24215205.0, "reward": 1.01896071434021, "reward_std": 0.050968606024980545, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5222964286804199, "rewards/format_reward_step": 0.9921875, "step": 120 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2638569069094956, "aux_distill/mean_u": 0.08030497207501669, "aux_distill/n_active_tok": 430.40625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5537249733258018, "calib/avg_num_step_conf": 6.72265625, "calib/ece": 0.39425196850393696, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.028054980229711923, "calib/mean_conf": 0.061417322834645675, "calib/mu_c": 0.07699115044247788, "calib/mu_w": 0.04893617021276596, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005393700787401575, "calib/std_conf": 0.12124176672719679, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2032.0, "completions/max_terminated_length": 2032.0, "completions/mean_length": 477.2421875, "completions/mean_terminated_length": 477.2421875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.12906666666666666, "grad_norm": 0.005236073397099972, "learning_rate": 2.1944444444444445e-06, "loss": 0.0612, "num_tokens": 24442435.0, "reward": 1.013102650642395, "reward_std": 0.06841275840997696, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5965180397033691, "rewards/format_reward_step": 0.98828125, "step": 121 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.30170248029753566, "aux_distill/mean_u": 0.09285741823046123, "aux_distill/n_active_tok": 392.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5370165065287016, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.4724313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.0039042867701404374, "calib/mean_conf": 0.05635294117647059, "calib/mu_c": 0.05446969696969697, "calib/mu_w": 0.058373983739837404, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005568627450980392, "calib/std_conf": 0.08888653783895817, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2746.0, "completions/max_terminated_length": 2746.0, "completions/mean_length": 416.80078125, "completions/mean_terminated_length": 416.80078125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.13013333333333332, "grad_norm": 0.006182767450809479, "learning_rate": 2.166666666666667e-06, "loss": 0.0311, "num_tokens": 24656480.0, "reward": 1.014758586883545, "reward_std": 0.05790330469608307, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5217046737670898, "rewards/format_reward_step": 0.9921875, "step": 122 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29011322488076985, "aux_distill/mean_u": 0.09992779375887095, "aux_distill/n_active_tok": 414.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4108954845256215, "calib/avg_num_step_conf": 6.4609375, "calib/ece": 0.39342519685039384, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.022611618467782847, "calib/mean_conf": 0.059015748031496056, "calib/mu_c": 0.04601851851851852, "calib/mu_w": 0.06863013698630137, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.013622047244094488, "calib/std_conf": 0.11863485195530105, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2660.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 486.0546875, "completions/mean_terminated_length": 486.0546875, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.1312, "grad_norm": 0.005104098469018936, "learning_rate": 2.138888888888889e-06, "loss": 0.0494, "num_tokens": 24886198.0, "reward": 0.9989855289459229, "reward_std": 0.05920081585645676, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5878148078918457, "rewards/format_reward_step": 0.98828125, "step": 123 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27182958112098277, "aux_distill/mean_u": 0.07772999730943052, "aux_distill/n_active_tok": 394.6875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5088645910551025, "calib/avg_num_step_conf": 6.1796875, "calib/ece": 0.5028515625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.0032406721027922053, "calib/mean_conf": 0.06785156250000002, "calib/mu_c": 0.06640845070422535, "calib/mu_w": 0.06964912280701756, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0080078125, "calib/std_conf": 0.11065003882199315, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1031.0, "completions/max_terminated_length": 1031.0, "completions/mean_length": 432.05078125, "completions/mean_terminated_length": 433.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.13226666666666667, "grad_norm": 0.005296853370964527, "learning_rate": 2.1111111111111114e-06, "loss": 0.0149, "num_tokens": 25103619.0, "reward": 1.0284123420715332, "reward_std": 0.04734199121594429, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5021370649337769, "rewards/format_reward_step": 1.0, "step": 124 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2565637626685202, "aux_distill/mean_u": 0.08125035877125407, "aux_distill/n_active_tok": 428.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.519600428562425, "calib/avg_num_step_conf": 6.6796875, "calib/ece": 0.4577380952380951, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009307997731140091, "calib/mean_conf": 0.07035714285714285, "calib/mu_c": 0.0658139534883721, "calib/mu_w": 0.07512195121951219, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008095238095238095, "calib/std_conf": 0.10090203989676719, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2476.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 464.0078125, "completions/mean_terminated_length": 464.0078125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.13333333333333333, "grad_norm": 0.004538561217486858, "learning_rate": 2.0833333333333334e-06, "loss": 0.1154, "num_tokens": 25327213.0, "reward": 1.0061924457550049, "reward_std": 0.09807506203651428, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5280097723007202, "rewards/format_reward_step": 0.98046875, "step": 125 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25415724446065724, "aux_distill/mean_u": 0.09618354704585452, "aux_distill/n_active_tok": 453.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5506628787878788, "calib/avg_num_step_conf": 7.078125, "calib/ece": 0.3693700787401575, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.03522474747474749, "calib/mean_conf": 0.07039370078740156, "calib/mu_c": 0.09036363636363638, "calib/mu_w": 0.05513888888888889, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003346456692913386, "calib/std_conf": 0.13549211311447404, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 458.890625, "completions/mean_terminated_length": 458.890625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.1344, "grad_norm": 0.00495866360142827, "learning_rate": 2.0555555555555555e-06, "loss": 0.067, "num_tokens": 25550153.0, "reward": 1.0194499492645264, "reward_std": 0.06901095807552338, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6170250177383423, "rewards/format_reward_step": 0.9921875, "step": 126 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24842926231212914, "aux_distill/mean_u": 0.08070918253257382, "aux_distill/n_active_tok": 426.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4438401214267645, "calib/avg_num_step_conf": 6.6640625, "calib/ece": 0.412579365079365, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008068555527447507, "calib/mean_conf": 0.06734126984126985, "calib/mu_c": 0.06305084745762712, "calib/mu_w": 0.07111940298507463, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005833333333333333, "calib/std_conf": 0.09446420861627229, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2899.0, "completions/max_terminated_length": 2899.0, "completions/mean_length": 445.41796875, "completions/mean_terminated_length": 448.9252014160156, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.13546666666666668, "grad_norm": 0.0055075036361813545, "learning_rate": 2.027777777777778e-06, "loss": 0.0387, "num_tokens": 25767852.0, "reward": 1.0068135261535645, "reward_std": 0.07916107773780823, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5683144330978394, "rewards/format_reward_step": 0.984375, "step": 127 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.28774312208406627, "aux_distill/mean_u": 0.08850660982137487, "aux_distill/n_active_tok": 385.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5455390910781821, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.432244094488189, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.007952755905511838, "calib/mean_conf": 0.08216535433070866, "calib/mu_c": 0.08614173228346458, "calib/mu_w": 0.07818897637795275, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007204724409448818, "calib/std_conf": 0.13285343672660974, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 465.015625, "completions/mean_terminated_length": 466.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.13653333333333334, "grad_norm": 0.005524276290088892, "learning_rate": 2.0000000000000003e-06, "loss": 0.0462, "num_tokens": 25993560.0, "reward": 1.0189135074615479, "reward_std": 0.0828317403793335, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5534519553184509, "rewards/format_reward_step": 0.98828125, "step": 128 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.286829563556239, "aux_distill/mean_u": 0.10107326307054654, "aux_distill/n_active_tok": 427.4375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5765517241379311, "calib/avg_num_step_conf": 6.625, "calib/ece": 0.47545098039215694, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.06378056426332289, "calib/mean_conf": 0.0931764705882353, "calib/mu_c": 0.1206896551724138, "calib/mu_w": 0.05690909090909091, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.15115148950539692, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1065.0, "completions/max_terminated_length": 1065.0, "completions/mean_length": 413.0859375, "completions/mean_terminated_length": 414.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.1376, "grad_norm": 0.005374370142817497, "learning_rate": 1.9722222222222224e-06, "loss": 0.0417, "num_tokens": 26201694.0, "reward": 1.048750400543213, "reward_std": 0.07723085582256317, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5350008010864258, "rewards/format_reward_step": 0.99609375, "step": 129 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25664816494099796, "aux_distill/mean_u": 0.08382097004231381, "aux_distill/n_active_tok": 418.5625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5630448717948718, "calib/avg_num_step_conf": 6.55859375, "calib/ece": 0.5329296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.011325641025641028, "calib/mean_conf": 0.0841015625, "calib/mu_c": 0.08852564102564103, "calib/mu_w": 0.0772, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0038281250000000017, "calib/std_conf": 0.13308821908816196, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 425.19921875, "completions/mean_terminated_length": 426.86669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.13866666666666666, "grad_norm": 0.00602386612445116, "learning_rate": 1.944444444444445e-06, "loss": 0.0334, "num_tokens": 26415833.0, "reward": 1.0415525436401367, "reward_std": 0.060056086629629135, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.47373008728027344, "rewards/format_reward_step": 1.0, "step": 130 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2656695644836873, "aux_distill/mean_u": 0.07452905320152445, "aux_distill/n_active_tok": 410.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47207547169811315, "calib/avg_num_step_conf": 6.515625, "calib/ece": 0.3513671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": -0.010069182389937092, "calib/mean_conf": 0.09816406250000001, "calib/mu_c": 0.09226415094339624, "calib/mu_w": 0.10233333333333333, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017734375, "calib/std_conf": 0.15105148529059917, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1161.0, "completions/max_terminated_length": 1161.0, "completions/mean_length": 406.83984375, "completions/mean_terminated_length": 408.4353332519531, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.13973333333333332, "grad_norm": 0.006129199638962746, "learning_rate": 1.916666666666667e-06, "loss": 0.0029, "num_tokens": 26626192.0, "reward": 1.0219767093658447, "reward_std": 0.0614263117313385, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.629891037940979, "rewards/format_reward_step": 1.0, "step": 131 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29841618961654603, "aux_distill/mean_u": 0.14273641680605606, "aux_distill/n_active_tok": 422.15625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4860747365059087, "calib/avg_num_step_conf": 6.625, "calib/ece": 0.5340625000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": 0.0047716384541680035, "calib/mean_conf": 0.075859375, "calib/mu_c": 0.07774193548387097, "calib/mu_w": 0.07297029702970297, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0022265625, "calib/std_conf": 0.10301340434433459, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 446.05078125, "completions/mean_terminated_length": 447.8000183105469, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.1408, "grad_norm": 0.005947123281657696, "learning_rate": 1.888888888888889e-06, "loss": 0.056, "num_tokens": 26845973.0, "reward": 1.0388870239257812, "reward_std": 0.04800880327820778, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.4723054766654968, "rewards/format_reward_step": 1.0, "step": 132 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2982191611081362, "aux_distill/mean_u": 0.09348481597065017, "aux_distill/n_active_tok": 451.40625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5446681792399319, "calib/avg_num_step_conf": 7.0390625, "calib/ece": 0.2591338582677165, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.02002977878615997, "calib/mean_conf": 0.09582677165354332, "calib/mu_c": 0.10939024390243904, "calib/mu_w": 0.08936046511627907, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.016062992125984252, "calib/std_conf": 0.14255233018907015, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2528.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 500.64453125, "completions/mean_terminated_length": 502.6078796386719, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.14186666666666667, "grad_norm": 0.004796723835170269, "learning_rate": 1.8611111111111113e-06, "loss": 0.0458, "num_tokens": 27080482.0, "reward": 1.0125898122787476, "reward_std": 0.08151218295097351, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.7126797437667847, "rewards/format_reward_step": 0.9921875, "step": 133 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25477377604693174, "aux_distill/mean_u": 0.07858637495460441, "aux_distill/n_active_tok": 447.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5150949268596328, "calib/avg_num_step_conf": 6.96875, "calib/ece": 0.3933464566929134, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.01750575785869904, "calib/mean_conf": 0.09027559055118112, "calib/mu_c": 0.09957983193277312, "calib/mu_w": 0.08207407407407408, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007559055118110235, "calib/std_conf": 0.149685769402636, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1522.0, "completions/max_terminated_length": 1522.0, "completions/mean_length": 510.84375, "completions/mean_terminated_length": 512.8471069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.14293333333333333, "grad_norm": 0.004951619543135166, "learning_rate": 1.8333333333333333e-06, "loss": 0.0196, "num_tokens": 27320210.0, "reward": 1.0233181715011597, "reward_std": 0.0827612429857254, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5896050930023193, "rewards/format_reward_step": 0.9921875, "step": 134 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26247306214645505, "aux_distill/mean_u": 0.07126044264829093, "aux_distill/n_active_tok": 443.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5324651162790698, "calib/avg_num_step_conf": 6.8984375, "calib/ece": 0.4218503937007875, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.01186914728682173, "calib/mean_conf": 0.10925196850393701, "calib/mu_c": 0.11528000000000002, "calib/mu_w": 0.10341085271317829, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.019488188976377954, "calib/std_conf": 0.16813424840956995, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2301.0, "completions/max_terminated_length": 2301.0, "completions/mean_length": 519.9921875, "completions/mean_terminated_length": 519.9921875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.144, "grad_norm": 0.004932735580950975, "learning_rate": 1.8055555555555557e-06, "loss": 0.0677, "num_tokens": 27559208.0, "reward": 1.0246297121047974, "reward_std": 0.10806107521057129, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.57269686460495, "rewards/format_reward_step": 0.98828125, "step": 135 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2540166104445234, "aux_distill/mean_u": 0.07143832899791504, "aux_distill/n_active_tok": 462.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5743910256410257, "calib/avg_num_step_conf": 7.23046875, "calib/ece": 0.3616929133858268, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.0005217948717948367, "calib/mean_conf": 0.09751968503937009, "calib/mu_c": 0.09721153846153847, "calib/mu_w": 0.09773333333333331, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.024881889763779527, "calib/std_conf": 0.1430177760216139, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 493.8828125, "completions/mean_terminated_length": 493.8828125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.14506666666666668, "grad_norm": 0.005398144014179707, "learning_rate": 1.777777777777778e-06, "loss": 0.0419, "num_tokens": 27794130.0, "reward": 1.0051078796386719, "reward_std": 0.10781945288181305, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6234968900680542, "rewards/format_reward_step": 0.98046875, "step": 136 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2740769935771823, "aux_distill/mean_u": 0.098901419560321, "aux_distill/n_active_tok": 467.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5006140997297961, "calib/avg_num_step_conf": 7.328125, "calib/ece": 0.391484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0234375, "calib/gap": 0.010186686317858018, "calib/mean_conf": 0.10179687500000001, "calib/mu_c": 0.10728813559322034, "calib/mu_w": 0.09710144927536232, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.016171875000000002, "calib/std_conf": 0.16214592961352553, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1113.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 458.484375, "completions/mean_terminated_length": 460.2823791503906, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.14613333333333334, "grad_norm": 0.005362810101360083, "learning_rate": 1.75e-06, "loss": 0.031, "num_tokens": 28018486.0, "reward": 1.0311261415481567, "reward_std": 0.06680846959352493, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6013148427009583, "rewards/format_reward_step": 1.0, "step": 137 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.23668501828797162, "aux_distill/mean_u": 0.07159552976812263, "aux_distill/n_active_tok": 435.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4650963189013923, "calib/avg_num_step_conf": 6.796875, "calib/ece": 0.49929133858267705, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": -0.016700362387945825, "calib/mean_conf": 0.10070866141732283, "calib/mu_c": 0.0936734693877551, "calib/mu_w": 0.11037383177570093, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.01062992125984252, "calib/std_conf": 0.147430384491924, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2362.0, "completions/max_terminated_length": 2362.0, "completions/mean_length": 479.22265625, "completions/mean_terminated_length": 481.10198974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.1472, "grad_norm": 0.006265109404921532, "learning_rate": 1.7222222222222224e-06, "loss": 0.0322, "num_tokens": 28245503.0, "reward": 1.0223547220230103, "reward_std": 0.1057506576180458, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.48611563444137573, "rewards/format_reward_step": 0.984375, "step": 138 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2570781602989882, "aux_distill/mean_u": 0.08331463642739878, "aux_distill/n_active_tok": 434.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47682389937106917, "calib/avg_num_step_conf": 6.81640625, "calib/ece": 0.500546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.01027672955974844, "calib/mean_conf": 0.100078125, "calib/mu_c": 0.10433333333333333, "calib/mu_w": 0.0940566037735849, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00734375, "calib/std_conf": 0.1285435000164706, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1121.0, "completions/max_terminated_length": 1121.0, "completions/mean_length": 434.38671875, "completions/mean_terminated_length": 436.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.14826666666666666, "grad_norm": 0.005986573174595833, "learning_rate": 1.6944444444444446e-06, "loss": -0.0022, "num_tokens": 28459802.0, "reward": 1.0478633642196655, "reward_std": 0.07045100629329681, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5097890496253967, "rewards/format_reward_step": 1.0, "step": 139 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24574917717836797, "aux_distill/mean_u": 0.0754423186694742, "aux_distill/n_active_tok": 439.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5346385542168675, "calib/avg_num_step_conf": 6.8984375, "calib/ece": 0.5513671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.032148594377510026, "calib/mean_conf": 0.1041796875, "calib/mu_c": 0.11548192771084338, "calib/mu_w": 0.08333333333333336, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0035546875000000014, "calib/std_conf": 0.15045539899718569, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1043.0, "completions/max_terminated_length": 1043.0, "completions/mean_length": 466.796875, "completions/mean_terminated_length": 468.6274719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.14933333333333335, "grad_norm": 0.005342083051800728, "learning_rate": 1.6666666666666667e-06, "loss": 0.055, "num_tokens": 28684318.0, "reward": 1.0581376552581787, "reward_std": 0.07809313386678696, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.4678378999233246, "rewards/format_reward_step": 1.0, "step": 140 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25117631489410996, "aux_distill/mean_u": 0.09753673918011269, "aux_distill/n_active_tok": 453.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49602630857769253, "calib/avg_num_step_conf": 7.06640625, "calib/ece": 0.5469960474308301, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": -0.007095094546451075, "calib/mean_conf": 0.13371541501976283, "calib/mu_c": 0.13121951219512196, "calib/mu_w": 0.13831460674157303, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.016245059288537547, "calib/std_conf": 0.19704243548771552, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2834.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 520.3203125, "completions/mean_terminated_length": 522.36083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.1504, "grad_norm": 0.00543976342305541, "learning_rate": 1.638888888888889e-06, "loss": 0.0539, "num_tokens": 28924616.0, "reward": 1.044323205947876, "reward_std": 0.12294139713048935, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.45974022150039673, "rewards/format_reward_step": 0.98828125, "step": 141 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24302505189552903, "aux_distill/mean_u": 0.08167430237365858, "aux_distill/n_active_tok": 460.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5374961832061069, "calib/avg_num_step_conf": 7.24609375, "calib/ece": 0.40203124999999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": 0.02609099236641224, "calib/mean_conf": 0.12703125, "calib/mu_c": 0.13977099236641224, "calib/mu_w": 0.11368, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.008671875, "calib/std_conf": 0.1699418989638444, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1123.0, "completions/max_terminated_length": 1123.0, "completions/mean_length": 481.52734375, "completions/mean_terminated_length": 483.41571044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.15146666666666667, "grad_norm": 0.0055787647143006325, "learning_rate": 1.6111111111111113e-06, "loss": 0.0218, "num_tokens": 29153047.0, "reward": 1.0430495738983154, "reward_std": 0.0907321497797966, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5821930170059204, "rewards/format_reward_step": 0.9921875, "step": 142 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25827985629439354, "aux_distill/mean_u": 0.08566694257432725, "aux_distill/n_active_tok": 480.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5286016949152543, "calib/avg_num_step_conf": 7.45703125, "calib/ece": 0.4213779527559056, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.03622133599202389, "calib/mean_conf": 0.11405511811023622, "calib/mu_c": 0.13088235294117648, "calib/mu_w": 0.09466101694915259, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.13521815602166537, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2658.0, "completions/max_terminated_length": 2658.0, "completions/mean_length": 508.015625, "completions/mean_terminated_length": 508.015625, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 0.15253333333333333, "grad_norm": 0.005299092270433903, "learning_rate": 1.5833333333333333e-06, "loss": 0.0571, "num_tokens": 29390435.0, "reward": 1.0422933101654053, "reward_std": 0.10097577422857285, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5650554895401001, "rewards/format_reward_step": 0.98828125, "step": 143 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.22756909485906363, "aux_distill/mean_u": 0.05125861934791381, "aux_distill/n_active_tok": 478.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5264581296839361, "calib/avg_num_step_conf": 7.51953125, "calib/ece": 0.46848307086614177, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.019926014988595614, "calib/mean_conf": 0.15781614173228348, "calib/mu_c": 0.1655825806451613, "calib/mu_w": 0.14565656565656568, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008031496062992128, "calib/std_conf": 0.16857469551445417, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1621.0, "completions/max_terminated_length": 1621.0, "completions/mean_length": 495.6484375, "completions/mean_terminated_length": 497.5921936035156, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1536, "grad_norm": 0.006199431139975786, "learning_rate": 1.5555555555555558e-06, "loss": 0.0044, "num_tokens": 29621449.0, "reward": 1.0659891366958618, "reward_std": 0.11490050703287125, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5343221426010132, "rewards/format_reward_step": 0.9921875, "step": 144 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24254822148941457, "aux_distill/mean_u": 0.0902089617046771, "aux_distill/n_active_tok": 475.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5481939935064934, "calib/avg_num_step_conf": 7.484375, "calib/ece": 0.48573671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01953125, "calib/gap": 0.05436439393939396, "calib/mean_conf": 0.18340390625, "calib/mu_c": 0.2020916666666667, "calib/mu_w": 0.14772727272727273, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006445312500000001, "calib/std_conf": 0.20833097724172275, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1139.0, "completions/max_terminated_length": 1139.0, "completions/mean_length": 463.28125, "completions/mean_terminated_length": 465.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.15466666666666667, "grad_norm": 0.0066559393890202045, "learning_rate": 1.527777777777778e-06, "loss": 0.0377, "num_tokens": 29842753.0, "reward": 1.0902000665664673, "reward_std": 0.12901276350021362, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.5280565023422241, "rewards/format_reward_step": 0.99609375, "step": 145 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24017924163490534, "aux_distill/mean_u": 0.055623048994550915, "aux_distill/n_active_tok": 482.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5867346938775511, "calib/avg_num_step_conf": 7.69921875, "calib/ece": 0.27204724409448816, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.007849293563579302, "calib/mean_conf": 0.18456692913385828, "calib/mu_c": 0.18938775510204084, "calib/mu_w": 0.18153846153846154, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.035393700787401575, "calib/std_conf": 0.1884405790155179, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2372.0, "completions/max_terminated_length": 2372.0, "completions/mean_length": 501.0, "completions/mean_terminated_length": 502.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 215.0, "epoch": 0.15573333333333333, "grad_norm": 0.006187766790390015, "learning_rate": 1.5e-06, "loss": 0.0536, "num_tokens": 30078225.0, "reward": 1.0223695039749146, "reward_std": 0.12816308438777924, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6775516271591187, "rewards/format_reward_step": 0.984375, "step": 146 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.23265002365224063, "aux_distill/mean_u": 0.07974558114182709, "aux_distill/n_active_tok": 506.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48316215375038907, "calib/avg_num_step_conf": 7.91015625, "calib/ece": 0.3200393700787401, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.001661375661375697, "calib/mean_conf": 0.18625984251968505, "calib/mu_c": 0.18714285714285717, "calib/mu_w": 0.18548148148148147, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.018897637795275597, "calib/std_conf": 0.17227229465438432, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 526.20703125, "completions/mean_terminated_length": 526.20703125, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 0.1568, "grad_norm": 0.005905737169086933, "learning_rate": 1.4722222222222225e-06, "loss": 0.0701, "num_tokens": 30316614.0, "reward": 1.0446211099624634, "reward_std": 0.11209353804588318, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6361172199249268, "rewards/format_reward_step": 0.98828125, "step": 147 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2535096355713904, "aux_distill/mean_u": 0.08370382600464303, "aux_distill/n_active_tok": 458.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5175713523606296, "calib/avg_num_step_conf": 7.16796875, "calib/ece": 0.43325490196078437, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.008258869031741778, "calib/mean_conf": 0.2384313725490196, "calib/mu_c": 0.2414110429447853, "calib/mu_w": 0.23315217391304352, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01623529411764706, "calib/std_conf": 0.21298046075361465, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 476.1640625, "completions/mean_terminated_length": 476.1640625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.15786666666666666, "grad_norm": 0.006926925852894783, "learning_rate": 1.4444444444444445e-06, "loss": 0.0402, "num_tokens": 30543624.0, "reward": 1.098899245262146, "reward_std": 0.13413414359092712, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.564985990524292, "rewards/format_reward_step": 0.99609375, "step": 148 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.23205461748875678, "aux_distill/mean_u": 0.06465849815899094, "aux_distill/n_active_tok": 519.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5840522630657663, "calib/avg_num_step_conf": 8.1171875, "calib/ece": 0.2754877470355731, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.05102485621405345, "calib/mean_conf": 0.24925533596837945, "calib/mu_c": 0.2742635658914728, "calib/mu_w": 0.22323870967741938, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.007430830039525693, "calib/std_conf": 0.18631744623604815, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2487.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 577.2734375, "completions/mean_terminated_length": 577.2734375, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 0.15893333333333334, "grad_norm": 0.005738920997828245, "learning_rate": 1.4166666666666667e-06, "loss": 0.077, "num_tokens": 30795862.0, "reward": 1.0786305665969849, "reward_std": 0.14628009498119354, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6650736927986145, "rewards/format_reward_step": 0.98828125, "step": 149 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26180982729420066, "aux_distill/mean_u": 0.08911000792044878, "aux_distill/n_active_tok": 464.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5819143446852425, "calib/avg_num_step_conf": 7.26171875, "calib/ece": 0.3128740157480314, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.061225490196078425, "calib/mean_conf": 0.2979133858267717, "calib/mu_c": 0.32249999999999995, "calib/mu_w": 0.26127450980392153, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006181102362204721, "calib/std_conf": 0.21724649535051074, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2426.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 479.67578125, "completions/mean_terminated_length": 479.67578125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.16, "grad_norm": 0.006487760692834854, "learning_rate": 1.3888888888888892e-06, "loss": 0.0327, "num_tokens": 31023619.0, "reward": 1.116228699684143, "reward_std": 0.15358006954193115, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6465198993682861, "rewards/format_reward_step": 0.9921875, "step": 150 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2628675187006593, "aux_distill/mean_u": 0.08701564027971015, "aux_distill/n_active_tok": 504.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.532805645622735, "calib/avg_num_step_conf": 7.93359375, "calib/ece": 0.19424685039370077, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.02318017038591136, "calib/mean_conf": 0.3063043307086614, "calib/mu_c": 0.31971962616822425, "calib/mu_w": 0.2965394557823129, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03964566929133858, "calib/std_conf": 0.2062846493465961, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 506.50390625, "completions/mean_terminated_length": 508.490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.16106666666666666, "grad_norm": 0.00684371730312705, "learning_rate": 1.3611111111111112e-06, "loss": 0.0143, "num_tokens": 31260308.0, "reward": 1.0545872449874878, "reward_std": 0.16151979565620422, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.7029244899749756, "rewards/format_reward_step": 0.98828125, "step": 151 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24851590069010854, "aux_distill/mean_u": 0.06157251443173721, "aux_distill/n_active_tok": 468.65625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5187788906009245, "calib/avg_num_step_conf": 7.51171875, "calib/ece": 0.3058164, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.02, "calib/gap": -0.0011815485362095535, "calib/mean_conf": 0.3052236, "calib/mu_c": 0.3046659090909091, "calib/mu_w": 0.3058474576271187, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04151999999999999, "calib/std_conf": 0.2093360103829248, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2837.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 511.64453125, "completions/mean_terminated_length": 515.6732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.16213333333333332, "grad_norm": 0.0067654200829565525, "learning_rate": 1.3333333333333334e-06, "loss": 0.0823, "num_tokens": 31496681.0, "reward": 1.0633323192596436, "reward_std": 0.19225209951400757, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6383833885192871, "rewards/format_reward_step": 0.97265625, "step": 152 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2772968136705458, "aux_distill/mean_u": 0.10214465292753641, "aux_distill/n_active_tok": 470.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5065810907669251, "calib/avg_num_step_conf": 7.21484375, "calib/ece": 0.3198753968253968, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": -0.00302903499055196, "calib/mean_conf": 0.32607698412698416, "calib/mu_c": 0.3248389261744966, "calib/mu_w": 0.32786796116504857, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02734126984126984, "calib/std_conf": 0.21308511290863438, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 526.5625, "completions/mean_terminated_length": 526.5625, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 0.1632, "grad_norm": 0.00628744438290596, "learning_rate": 1.3055555555555556e-06, "loss": 0.1055, "num_tokens": 31738801.0, "reward": 1.0987610816955566, "reward_std": 0.1717906892299652, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6311159133911133, "rewards/format_reward_step": 0.984375, "step": 153 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25234381505288184, "aux_distill/mean_u": 0.08080222799912855, "aux_distill/n_active_tok": 431.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48318968152087544, "calib/avg_num_step_conf": 6.77734375, "calib/ece": 0.229622265625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03125, "calib/gap": -0.022947973592517823, "calib/mean_conf": 0.372955859375, "calib/mu_c": 0.36193007518796994, "calib/mu_w": 0.38487804878048776, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.041523437499999996, "calib/std_conf": 0.21237021947984544, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1556.0, "completions/max_terminated_length": 1556.0, "completions/mean_length": 476.73046875, "completions/mean_terminated_length": 478.60003662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.16426666666666667, "grad_norm": 0.006957808043807745, "learning_rate": 1.2777777777777779e-06, "loss": 0.034, "num_tokens": 31965284.0, "reward": 1.095935344696045, "reward_std": 0.15885654091835022, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6723395586013794, "rewards/format_reward_step": 1.0, "step": 154 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24342874670401216, "aux_distill/mean_u": 0.0742800901495456, "aux_distill/n_active_tok": 441.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4895392278953922, "calib/avg_num_step_conf": 6.921875, "calib/ece": 0.18644921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": -0.0059505603985056665, "calib/mean_conf": 0.37456640625000004, "calib/mu_c": 0.3711727272727272, "calib/mu_w": 0.3771232876712329, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0656640625, "calib/std_conf": 0.20673298440309412, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1042.0, "completions/max_terminated_length": 1042.0, "completions/mean_length": 457.39453125, "completions/mean_terminated_length": 459.1882629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.16533333333333333, "grad_norm": 0.008058539591729641, "learning_rate": 1.25e-06, "loss": 0.0351, "num_tokens": 32189593.0, "reward": 1.0679690837860107, "reward_std": 0.1546422243118286, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7062505483627319, "rewards/format_reward_step": 1.0, "step": 155 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24015296366997063, "aux_distill/mean_u": 0.07636279268520298, "aux_distill/n_active_tok": 467.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.53173828125, "calib/avg_num_step_conf": 7.375, "calib/ece": 0.21515625000000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.02734375, "calib/gap": 0.016718750000000004, "calib/mean_conf": 0.39187500000000003, "calib/mu_c": 0.40023437500000003, "calib/mu_w": 0.383515625, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05351562500000002, "calib/std_conf": 0.21984280605696427, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1665.0, "completions/max_terminated_length": 1665.0, "completions/mean_length": 470.7890625, "completions/mean_terminated_length": 472.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.1664, "grad_norm": 0.0072465743869543076, "learning_rate": 1.2222222222222223e-06, "loss": 0.0173, "num_tokens": 32414875.0, "reward": 1.0991687774658203, "reward_std": 0.1531982123851776, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6983374953269958, "rewards/format_reward_step": 1.0, "step": 156 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2766073530074209, "aux_distill/mean_u": 0.09899811663300234, "aux_distill/n_active_tok": 461.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5205314009661836, "calib/avg_num_step_conf": 7.17578125, "calib/ece": 0.18030277777777773, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": 0.01946998474446987, "calib/mean_conf": 0.41803055555555557, "calib/mu_c": 0.4268384057971014, "calib/mu_w": 0.40736842105263155, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.025357142857142856, "calib/std_conf": 0.20073677321784758, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 498.0625, "completions/mean_terminated_length": 500.0157165527344, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.16746666666666668, "grad_norm": 0.006882913876324892, "learning_rate": 1.1944444444444446e-06, "loss": 0.04, "num_tokens": 32646107.0, "reward": 1.1105782985687256, "reward_std": 0.1789938509464264, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6938129663467407, "rewards/format_reward_step": 0.984375, "step": 157 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2523889895528555, "aux_distill/mean_u": 0.08844801318115658, "aux_distill/n_active_tok": 426.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44285187007874016, "calib/avg_num_step_conf": 6.66796875, "calib/ece": 0.22184588235294125, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": -0.04047578740157476, "calib/mean_conf": 0.4496835294117647, "calib/mu_c": 0.42952500000000005, "calib/mu_w": 0.4700007874015748, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08478431372549022, "calib/std_conf": 0.2085711531463499, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2191.0, "completions/max_terminated_length": 2191.0, "completions/mean_length": 474.046875, "completions/mean_terminated_length": 474.046875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.16853333333333334, "grad_norm": 0.007169242948293686, "learning_rate": 1.1666666666666668e-06, "loss": 0.0458, "num_tokens": 32872703.0, "reward": 1.088477611541748, "reward_std": 0.19478802382946014, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6808613538742065, "rewards/format_reward_step": 0.99609375, "step": 158 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2364857776556164, "aux_distill/mean_u": 0.07033274390036429, "aux_distill/n_active_tok": 448.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5201918377321604, "calib/avg_num_step_conf": 7.0859375, "calib/ece": 0.182526953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.046875, "calib/gap": 0.00289982893450641, "calib/mean_conf": 0.43778554687500004, "calib/mu_c": 0.4391901515151515, "calib/mu_w": 0.4362903225806451, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05234375000000001, "calib/std_conf": 0.2133529488140946, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 444.61328125, "completions/mean_terminated_length": 446.3569030761719, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.1696, "grad_norm": 0.007922896184027195, "learning_rate": 1.138888888888889e-06, "loss": 0.0248, "num_tokens": 33091308.0, "reward": 1.1078696250915527, "reward_std": 0.1769241839647293, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7001141309738159, "rewards/format_reward_step": 1.0, "step": 159 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24150769133120775, "aux_distill/mean_u": 0.06557536838335866, "aux_distill/n_active_tok": 482.0, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49964607464607463, "calib/avg_num_step_conf": 7.5390625, "calib/ece": 0.20342390438247007, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": 0.00861539897039898, "calib/mean_conf": 0.44542071713147413, "calib/mu_c": 0.44923071428571426, "calib/mu_w": 0.4406153153153153, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04553784860557768, "calib/std_conf": 0.20457986052335805, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2763.0, "completions/max_terminated_length": 2763.0, "completions/mean_length": 508.8671875, "completions/mean_terminated_length": 508.8671875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.17066666666666666, "grad_norm": 0.007158962078392506, "learning_rate": 1.111111111111111e-06, "loss": 0.0566, "num_tokens": 33326418.0, "reward": 1.1083617210388184, "reward_std": 0.17632320523262024, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6893797516822815, "rewards/format_reward_step": 0.98046875, "step": 160 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24881407851353288, "aux_distill/mean_u": 0.07915090880055803, "aux_distill/n_active_tok": 419.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5378493229616825, "calib/avg_num_step_conf": 6.57421875, "calib/ece": 0.27948046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.02734375, "calib/gap": 0.020856669547680617, "calib/mean_conf": 0.45270703125, "calib/mu_c": 0.459061797752809, "calib/mu_w": 0.43820512820512836, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.018437499999999992, "calib/std_conf": 0.20979708413860648, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 453.00390625, "completions/mean_terminated_length": 454.7804260253906, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.17173333333333332, "grad_norm": 0.007694258354604244, "learning_rate": 1.0833333333333335e-06, "loss": 0.0477, "num_tokens": 33546307.0, "reward": 1.1947121620178223, "reward_std": 0.16380652785301208, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.6941118240356445, "rewards/format_reward_step": 1.0, "step": 161 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.23428629664704204, "aux_distill/mean_u": 0.07806861418678401, "aux_distill/n_active_tok": 445.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.563063973063973, "calib/avg_num_step_conf": 6.97265625, "calib/ece": 0.2307843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.0368787878787879, "calib/mean_conf": 0.46952941176470586, "calib/mu_c": 0.48254545454545456, "calib/mu_w": 0.44566666666666666, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.026627450980392167, "calib/std_conf": 0.20145493856493812, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 439.0078125, "completions/mean_terminated_length": 440.72943115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.1728, "grad_norm": 0.008261593990027905, "learning_rate": 1.0555555555555557e-06, "loss": 0.0115, "num_tokens": 33762837.0, "reward": 1.173771619796753, "reward_std": 0.1813250035047531, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7108246088027954, "rewards/format_reward_step": 0.98828125, "step": 162 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.257523697335273, "aux_distill/mean_u": 0.08217107906337166, "aux_distill/n_active_tok": 482.46875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5485661424606846, "calib/avg_num_step_conf": 7.58984375, "calib/ece": 0.144507421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0390625, "calib/gap": 0.040020875732346584, "calib/mean_conf": 0.478305078125, "calib/mu_c": 0.5003478260869566, "calib/mu_w": 0.46032695035461, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08679687500000001, "calib/std_conf": 0.21256887379629488, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1571.0, "completions/max_terminated_length": 1571.0, "completions/mean_length": 497.17578125, "completions/mean_terminated_length": 499.1255187988281, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.17386666666666667, "grad_norm": 0.007228810340166092, "learning_rate": 1.0277777777777777e-06, "loss": 0.0138, "num_tokens": 33994946.0, "reward": 1.087785005569458, "reward_std": 0.17991483211517334, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.7263512015342712, "rewards/format_reward_step": 1.0, "step": 163 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24482636875472963, "aux_distill/mean_u": 0.06283773442355123, "aux_distill/n_active_tok": 508.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4689828614008942, "calib/avg_num_step_conf": 7.94140625, "calib/ece": 0.19609488188976373, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": -0.03212917287630396, "calib/mean_conf": 0.47059803149606294, "calib/mu_c": 0.45516590909090915, "calib/mu_w": 0.4872950819672131, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.073503937007874, "calib/std_conf": 0.1955032348546813, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 545.29296875, "completions/mean_terminated_length": 545.29296875, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 0.17493333333333333, "grad_norm": 0.007839228957891464, "learning_rate": 1.0000000000000002e-06, "loss": 0.0301, "num_tokens": 34240677.0, "reward": 1.094243049621582, "reward_std": 0.19971774518489838, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6845799088478088, "rewards/format_reward_step": 0.98828125, "step": 164 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27021268755197525, "aux_distill/mean_u": 0.07547623212198123, "aux_distill/n_active_tok": 471.9375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49396135265700486, "calib/avg_num_step_conf": 7.296875, "calib/ece": 0.15078745098039217, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": -0.006471794871794889, "calib/mean_conf": 0.4786305882352941, "calib/mu_c": 0.47512820512820514, "calib/mu_w": 0.48160000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08529725490196079, "calib/std_conf": 0.18865541199797206, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 510.9765625, "completions/mean_terminated_length": 512.9804077148438, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.176, "grad_norm": 0.00910587515681982, "learning_rate": 9.722222222222224e-07, "loss": 0.0356, "num_tokens": 34477063.0, "reward": 1.0814200639724731, "reward_std": 0.16908253729343414, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7097151875495911, "rewards/format_reward_step": 0.99609375, "step": 165 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2651129311416298, "aux_distill/mean_u": 0.09152722727924217, "aux_distill/n_active_tok": 490.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5931132075471699, "calib/avg_num_step_conf": 7.69140625, "calib/ece": 0.15345742187500003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03125, "calib/gap": 0.0671997106918239, "calib/mean_conf": 0.501464453125, "calib/mu_c": 0.5292893333333333, "calib/mu_w": 0.4620896226415094, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03449218750000001, "calib/std_conf": 0.19940186717996694, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1826.0, "completions/max_terminated_length": 1826.0, "completions/mean_length": 508.7421875, "completions/mean_terminated_length": 510.7372741699219, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.17706666666666668, "grad_norm": 0.008176633156836033, "learning_rate": 9.444444444444445e-07, "loss": 0.0261, "num_tokens": 34713485.0, "reward": 1.1645166873931885, "reward_std": 0.16638818383216858, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7430957555770874, "rewards/format_reward_step": 1.0, "step": 166 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2851410242728889, "aux_distill/mean_u": 0.10715140228125174, "aux_distill/n_active_tok": 474.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5361252979230507, "calib/avg_num_step_conf": 7.4140625, "calib/ece": 0.22204724409448814, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": 0.01943343547837939, "calib/mean_conf": 0.5154330708661418, "calib/mu_c": 0.5222424242424243, "calib/mu_w": 0.5028089887640449, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.043937007874015735, "calib/std_conf": 0.1914254687532988, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 492.58203125, "completions/mean_terminated_length": 492.58203125, "completions/min_length": 1.0, "completions/min_terminated_length": 1.0, "epoch": 0.17813333333333334, "grad_norm": 0.007018796168267727, "learning_rate": 9.166666666666666e-07, "loss": 0.0462, "num_tokens": 34945194.0, "reward": 1.1749070882797241, "reward_std": 0.17637360095977783, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7170015573501587, "rewards/format_reward_step": 0.98828125, "step": 167 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25646522268652916, "aux_distill/mean_u": 0.0982307983881972, "aux_distill/n_active_tok": 507.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5028800400701228, "calib/avg_num_step_conf": 7.93359375, "calib/ece": 0.16849169960474308, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.0014297520661158414, "calib/mean_conf": 0.5114292490118577, "calib/mu_c": 0.5121752066115703, "calib/mu_w": 0.5107454545454545, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10083003952569172, "calib/std_conf": 0.1999903553361993, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2440.0, "completions/max_terminated_length": 2440.0, "completions/mean_length": 529.5, "completions/mean_terminated_length": 529.5, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.1792, "grad_norm": 0.00782513152807951, "learning_rate": 8.88888888888889e-07, "loss": 0.0136, "num_tokens": 35185418.0, "reward": 1.085259199142456, "reward_std": 0.22317655384540558, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7017684578895569, "rewards/format_reward_step": 0.98828125, "step": 168 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2343459421535954, "aux_distill/mean_u": 0.06730843714362388, "aux_distill/n_active_tok": 472.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4648706896551724, "calib/avg_num_step_conf": 7.40625, "calib/ece": 0.16858476562499997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0390625, "calib/gap": -0.014609975369458073, "calib/mean_conf": 0.49805585937499997, "calib/mu_c": 0.4914357142857143, "calib/mu_w": 0.5060456896551724, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.059882812500000014, "calib/std_conf": 0.17959830494061663, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1085.0, "completions/max_terminated_length": 1085.0, "completions/mean_length": 511.5546875, "completions/mean_terminated_length": 513.560791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.18026666666666666, "grad_norm": 0.007080877665430307, "learning_rate": 8.611111111111112e-07, "loss": 0.0235, "num_tokens": 35420560.0, "reward": 1.128596305847168, "reward_std": 0.1757933497428894, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7103176116943359, "rewards/format_reward_step": 1.0, "step": 169 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24169203732162714, "aux_distill/mean_u": 0.07235590006785955, "aux_distill/n_active_tok": 500.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48724048442906565, "calib/avg_num_step_conf": 7.82421875, "calib/ece": 0.18541176470588233, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.002457983193277502, "calib/mean_conf": 0.5169411764705882, "calib/mu_c": 0.5180882352941177, "calib/mu_w": 0.5156302521008402, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08450980392156862, "calib/std_conf": 0.20376855496070237, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2780.0, "completions/max_terminated_length": 2780.0, "completions/mean_length": 520.23828125, "completions/mean_terminated_length": 520.23828125, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.18133333333333335, "grad_norm": 0.007690028753131628, "learning_rate": 8.333333333333333e-07, "loss": 0.0464, "num_tokens": 35657893.0, "reward": 1.1175562143325806, "reward_std": 0.18893221020698547, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7077687978744507, "rewards/format_reward_step": 0.99609375, "step": 170 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.23977546906098723, "aux_distill/mean_u": 0.07869014971729088, "aux_distill/n_active_tok": 492.53125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5743453579930415, "calib/avg_num_step_conf": 7.74609375, "calib/ece": 0.11351601562499998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.06640625, "calib/gap": 0.04610310077519375, "calib/mean_conf": 0.529531640625, "calib/mu_c": 0.5524031007751937, "calib/mu_w": 0.5063, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.069570703125, "calib/std_conf": 0.19799818352338072, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 494.609375, "completions/mean_terminated_length": 496.5490417480469, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1824, "grad_norm": 0.008125293999910355, "learning_rate": 8.055555555555557e-07, "loss": 0.0251, "num_tokens": 35891409.0, "reward": 1.1185557842254639, "reward_std": 0.19421759247779846, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7332054376602173, "rewards/format_reward_step": 1.0, "step": 171 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2299743457697332, "aux_distill/mean_u": 0.06312461743109235, "aux_distill/n_active_tok": 474.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5154698242933536, "calib/avg_num_step_conf": 7.43359375, "calib/ece": 0.16480390624999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03515625, "calib/gap": 0.006018665648077404, "calib/mean_conf": 0.53894609375, "calib/mu_c": 0.5413441558441558, "calib/mu_w": 0.5353254901960784, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05109375000000001, "calib/std_conf": 0.18868068777134087, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1732.0, "completions/max_terminated_length": 1732.0, "completions/mean_length": 495.3671875, "completions/mean_terminated_length": 497.3098449707031, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.18346666666666667, "grad_norm": 0.00744143221527338, "learning_rate": 7.777777777777779e-07, "loss": 0.0229, "num_tokens": 36121575.0, "reward": 1.1626207828521729, "reward_std": 0.19035744667053223, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7236789464950562, "rewards/format_reward_step": 1.0, "step": 172 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.22252439288422465, "aux_distill/mean_u": 0.07546641405015214, "aux_distill/n_active_tok": 527.53125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5107060755336617, "calib/avg_num_step_conf": 8.24609375, "calib/ece": 0.1567344, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": 0.007666469622331862, "calib/mean_conf": 0.5260655999999999, "calib/mu_c": 0.5292855172413794, "calib/mu_w": 0.5216190476190475, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05140000000000002, "calib/std_conf": 0.18500178468501324, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2847.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 577.44140625, "completions/mean_terminated_length": 577.44140625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.18453333333333333, "grad_norm": 0.007290961220860481, "learning_rate": 7.5e-07, "loss": 0.0186, "num_tokens": 36372560.0, "reward": 1.1172091960906982, "reward_std": 0.21927499771118164, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.699262261390686, "rewards/format_reward_step": 0.96875, "step": 173 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26341936527751386, "aux_distill/mean_u": 0.08537082247943154, "aux_distill/n_active_tok": 507.21875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48029110444777606, "calib/avg_num_step_conf": 7.9453125, "calib/ece": 0.1985464566929134, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": -0.003500662168915536, "calib/mean_conf": 0.5084614173228347, "calib/mu_c": 0.5065594827586207, "calib/mu_w": 0.5100601449275363, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12515748031496068, "calib/std_conf": 0.20398919361035334, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1966.0, "completions/max_terminated_length": 1966.0, "completions/mean_length": 533.25, "completions/mean_terminated_length": 535.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.1856, "grad_norm": 0.0074493614956736565, "learning_rate": 7.222222222222222e-07, "loss": 0.0166, "num_tokens": 36613304.0, "reward": 1.0728223323822021, "reward_std": 0.22512751817703247, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.7003322839736938, "rewards/format_reward_step": 0.9921875, "step": 174 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2397211652714759, "aux_distill/mean_u": 0.07551316888469718, "aux_distill/n_active_tok": 517.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5412217692357472, "calib/avg_num_step_conf": 8.15234375, "calib/ece": 0.1779350393700788, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07480314960629922, "calib/gap": 0.02992464246424653, "calib/mean_conf": 0.5439547244094488, "calib/mu_c": 0.561980198019802, "calib/mu_w": 0.5320555555555555, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16212598425196853, "calib/std_conf": 0.20303705617070827, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1543.0, "completions/max_terminated_length": 1543.0, "completions/mean_length": 500.96484375, "completions/mean_terminated_length": 504.9094543457031, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.18666666666666668, "grad_norm": 0.007952606305480003, "learning_rate": 6.944444444444446e-07, "loss": 0.0231, "num_tokens": 36847375.0, "reward": 1.0466676950454712, "reward_std": 0.202549010515213, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.7066166400909424, "rewards/format_reward_step": 0.9921875, "step": 175 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.27991568110883236, "aux_distill/mean_u": 0.10163999840663654, "aux_distill/n_active_tok": 489.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5015845398620518, "calib/avg_num_step_conf": 7.6640625, "calib/ece": 0.14553464566929142, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.004988616168520532, "calib/mean_conf": 0.5305283464566929, "calib/mu_c": 0.5331404958677686, "calib/mu_w": 0.528151879699248, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09984251968503945, "calib/std_conf": 0.17561454229133455, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1290.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 499.60546875, "completions/mean_terminated_length": 501.5647277832031, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.18773333333333334, "grad_norm": 0.006957260426133871, "learning_rate": 6.666666666666667e-07, "loss": 0.0035, "num_tokens": 37079338.0, "reward": 1.0912022590637207, "reward_std": 0.2196333408355713, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.713654637336731, "rewards/format_reward_step": 0.9921875, "step": 176 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.22919973416719586, "aux_distill/mean_u": 0.0725726520841592, "aux_distill/n_active_tok": 533.5625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5201538461538462, "calib/avg_num_step_conf": 8.33984375, "calib/ece": 0.14137254901960786, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.017147692307692197, "calib/mean_conf": 0.5409019607843137, "calib/mu_c": 0.5493076923076923, "calib/mu_w": 0.5321600000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08623529411764708, "calib/std_conf": 0.1974850954814864, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2626.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 527.80078125, "completions/mean_terminated_length": 527.80078125, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.1888, "grad_norm": 0.007587118539959192, "learning_rate": 6.388888888888889e-07, "loss": 0.0014, "num_tokens": 37318287.0, "reward": 1.1098990440368652, "reward_std": 0.18522927165031433, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.7158917784690857, "rewards/format_reward_step": 0.99609375, "step": 177 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2627130839973688, "aux_distill/mean_u": 0.09110170433559824, "aux_distill/n_active_tok": 548.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5410313192027839, "calib/avg_num_step_conf": 8.53125, "calib/ece": 0.11646338582677156, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.023774419487503917, "calib/mean_conf": 0.5405444881889764, "calib/mu_c": 0.550746896551724, "calib/mu_w": 0.5269724770642201, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.043070866141732264, "calib/std_conf": 0.17757437819813585, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 527.83984375, "completions/mean_terminated_length": 527.83984375, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.18986666666666666, "grad_norm": 0.0069207060150802135, "learning_rate": 6.111111111111112e-07, "loss": 0.0427, "num_tokens": 37559486.0, "reward": 1.143537998199463, "reward_std": 0.20619063079357147, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7284823060035706, "rewards/format_reward_step": 0.9921875, "step": 178 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26747484086081386, "aux_distill/mean_u": 0.08128448746610327, "aux_distill/n_active_tok": 508.625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5576569477278697, "calib/avg_num_step_conf": 7.9453125, "calib/ece": 0.12248915662650602, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.05220883534136546, "calib/gap": 0.04837458628841618, "calib/mean_conf": 0.5564650602409639, "calib/mu_c": 0.5774468085106385, "calib/mu_w": 0.5290722222222223, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.056344578313253, "calib/std_conf": 0.18966318239214708, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2277.0, "completions/max_terminated_length": 2277.0, "completions/mean_length": 523.2265625, "completions/mean_terminated_length": 525.2784423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.19093333333333334, "grad_norm": 0.008067484013736248, "learning_rate": 5.833333333333334e-07, "loss": 0.0513, "num_tokens": 37799696.0, "reward": 1.1226156949996948, "reward_std": 0.21316027641296387, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7217940092086792, "rewards/format_reward_step": 0.97265625, "step": 179 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2592920111492276, "aux_distill/mean_u": 0.10416253591956139, "aux_distill/n_active_tok": 583.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5327018633540372, "calib/avg_num_step_conf": 9.11328125, "calib/ece": 0.12081411764705885, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07450980392156863, "calib/gap": 0.027076149068323008, "calib/mean_conf": 0.5397349019607842, "calib/mu_c": 0.5519457142857144, "calib/mu_w": 0.5248695652173914, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05576470588235296, "calib/std_conf": 0.18514426637108808, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2586.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 607.265625, "completions/mean_terminated_length": 607.265625, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.192, "grad_norm": 0.007254335097968578, "learning_rate": 5.555555555555555e-07, "loss": 0.0433, "num_tokens": 38059012.0, "reward": 1.1357789039611816, "reward_std": 0.22384758293628693, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7285890579223633, "rewards/format_reward_step": 0.99609375, "step": 180 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24254727293737233, "aux_distill/mean_u": 0.06503256739429049, "aux_distill/n_active_tok": 493.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4927691146881288, "calib/avg_num_step_conf": 7.78125, "calib/ece": 0.17722047244094488, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": -0.013612927565392341, "calib/mean_conf": 0.5388425196850394, "calib/mu_c": 0.5312321428571428, "calib/mu_w": 0.5448450704225352, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13755905511811026, "calib/std_conf": 0.18144008035438455, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2533.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 504.9296875, "completions/mean_terminated_length": 506.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.19306666666666666, "grad_norm": 0.00835175346583128, "learning_rate": 5.277777777777779e-07, "loss": 0.0239, "num_tokens": 38294538.0, "reward": 1.0642285346984863, "reward_std": 0.19430889189243317, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6987694501876831, "rewards/format_reward_step": 0.9921875, "step": 181 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.23280840320512652, "aux_distill/mean_u": 0.07703698157631368, "aux_distill/n_active_tok": 552.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.541740957717779, "calib/avg_num_step_conf": 8.62890625, "calib/ece": 0.0938780392156863, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.024451980387162564, "calib/mean_conf": 0.5652592156862745, "calib/mu_c": 0.5752317880794703, "calib/mu_w": 0.5507798076923077, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03349019607843139, "calib/std_conf": 0.17475515730102623, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2817.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 543.6015625, "completions/mean_terminated_length": 543.6015625, "completions/min_length": 233.0, "completions/min_terminated_length": 233.0, "epoch": 0.19413333333333332, "grad_norm": 0.007248417474329472, "learning_rate": 5.000000000000001e-07, "loss": 0.0393, "num_tokens": 38539860.0, "reward": 1.157365083694458, "reward_std": 0.20332470536231995, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7326991558074951, "rewards/format_reward_step": 0.9921875, "step": 182 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2438644040375948, "aux_distill/mean_u": 0.0821571597139334, "aux_distill/n_active_tok": 574.96875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5762962962962963, "calib/avg_num_step_conf": 9.0390625, "calib/ece": 0.07639215686274511, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03529411764705882, "calib/gap": 0.04816666666666669, "calib/mean_conf": 0.5279999999999999, "calib/mu_c": 0.5535, "calib/mu_w": 0.5053333333333333, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06690196078431374, "calib/std_conf": 0.1661098148461893, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1648.0, "completions/max_terminated_length": 1648.0, "completions/mean_length": 585.2578125, "completions/mean_terminated_length": 587.552978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1952, "grad_norm": 0.007068790029734373, "learning_rate": 4.7222222222222226e-07, "loss": 0.0288, "num_tokens": 38796366.0, "reward": 1.102957010269165, "reward_std": 0.2160937488079071, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7410702705383301, "rewards/format_reward_step": 0.99609375, "step": 183 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2564887339249253, "aux_distill/mean_u": 0.10162648412902427, "aux_distill/n_active_tok": 555.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5537652531517562, "calib/avg_num_step_conf": 8.6875, "calib/ece": 0.18566771653543307, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": 0.025047960628328703, "calib/mean_conf": 0.5240960629921261, "calib/mu_c": 0.5330699386503067, "calib/mu_w": 0.508021978021978, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03401574803149607, "calib/std_conf": 0.19249284019881638, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 568.65625, "completions/mean_terminated_length": 568.65625, "completions/min_length": 214.0, "completions/min_terminated_length": 214.0, "epoch": 0.19626666666666667, "grad_norm": 0.0074840690940618515, "learning_rate": 4.444444444444445e-07, "loss": 0.0811, "num_tokens": 39047222.0, "reward": 1.1769556999206543, "reward_std": 0.19394519925117493, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.7250052094459534, "rewards/format_reward_step": 0.9921875, "step": 184 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24720469187013805, "aux_distill/mean_u": 0.09512097785666002, "aux_distill/n_active_tok": 571.0625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4885932780669623, "calib/avg_num_step_conf": 8.9609375, "calib/ece": 0.15493400000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.064, "calib/gap": -0.01469857335646807, "calib/mean_conf": 0.536506, "calib/mu_c": 0.5296270676691729, "calib/mu_w": 0.544325641025641, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07972, "calib/std_conf": 0.20492050518188756, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2768.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 580.8984375, "completions/mean_terminated_length": 583.176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.19733333333333333, "grad_norm": 0.008756212890148163, "learning_rate": 4.1666666666666667e-07, "loss": 0.0359, "num_tokens": 39302852.0, "reward": 1.0872716903686523, "reward_std": 0.23970657587051392, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6823559999465942, "rewards/format_reward_step": 0.97265625, "step": 185 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2736734275240451, "aux_distill/mean_u": 0.09042671780625551, "aux_distill/n_active_tok": 544.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5295651107158192, "calib/avg_num_step_conf": 8.546875, "calib/ece": 0.13554687500000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03515625, "calib/gap": 0.025905661534686875, "calib/mean_conf": 0.5333593750000001, "calib/mu_c": 0.5454014598540146, "calib/mu_w": 0.5194957983193277, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06687500000000006, "calib/std_conf": 0.18393391027651582, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1582.0, "completions/max_terminated_length": 1582.0, "completions/mean_length": 558.234375, "completions/mean_terminated_length": 560.423583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.1984, "grad_norm": 0.007027934771031141, "learning_rate": 3.8888888888888895e-07, "loss": 0.0447, "num_tokens": 39550800.0, "reward": 1.132723093032837, "reward_std": 0.186387836933136, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7302898168563843, "rewards/format_reward_step": 1.0, "step": 186 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2511372393928468, "aux_distill/mean_u": 0.07868845063013544, "aux_distill/n_active_tok": 574.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5694851535618708, "calib/avg_num_step_conf": 8.96484375, "calib/ece": 0.08982310756972113, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.03919008538294899, "calib/mean_conf": 0.5342406374501992, "calib/mu_c": 0.5526646616541354, "calib/mu_w": 0.5134745762711864, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.047091633466135464, "calib/std_conf": 0.1741905540078731, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2665.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 596.44140625, "completions/mean_terminated_length": 598.7804565429688, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.19946666666666665, "grad_norm": 0.006233526859432459, "learning_rate": 3.611111111111111e-07, "loss": 0.047, "num_tokens": 39805033.0, "reward": 1.112801194190979, "reward_std": 0.21134722232818604, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7256022691726685, "rewards/format_reward_step": 0.98046875, "step": 187 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2566721090115607, "aux_distill/mean_u": 0.09184461434823142, "aux_distill/n_active_tok": 534.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5928427799421893, "calib/avg_num_step_conf": 8.421875, "calib/ece": 0.11929529411764701, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.057999704662561324, "calib/mean_conf": 0.5308223529411765, "calib/mu_c": 0.5556143835616438, "calib/mu_w": 0.4976146788990825, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03878431372549018, "calib/std_conf": 0.17984997716057607, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1905.0, "completions/max_terminated_length": 1905.0, "completions/mean_length": 575.453125, "completions/mean_terminated_length": 577.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.20053333333333334, "grad_norm": 0.006328468210995197, "learning_rate": 3.3333333333333335e-07, "loss": 0.0486, "num_tokens": 40056421.0, "reward": 1.1526392698287964, "reward_std": 0.18582162261009216, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7427785992622375, "rewards/format_reward_step": 0.9921875, "step": 188 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2664337196620181, "aux_distill/mean_u": 0.10317610606850601, "aux_distill/n_active_tok": 523.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5631231306081755, "calib/avg_num_step_conf": 8.2890625, "calib/ece": 0.09929133858267714, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": 0.03753738783649063, "calib/mean_conf": 0.5631496062992126, "calib/mu_c": 0.5805882352941177, "calib/mu_w": 0.5430508474576271, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.063503937007874, "calib/std_conf": 0.17280775713214339, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 524.1328125, "completions/mean_terminated_length": 528.2598266601562, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.2016, "grad_norm": 0.007905436679720879, "learning_rate": 3.055555555555556e-07, "loss": 0.0023, "num_tokens": 40298367.0, "reward": 1.1284804344177246, "reward_std": 0.1881653368473053, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.733523428440094, "rewards/format_reward_step": 0.9921875, "step": 189 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24586508085485548, "aux_distill/mean_u": 0.08787149806346584, "aux_distill/n_active_tok": 583.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5551407782347358, "calib/avg_num_step_conf": 9.10546875, "calib/ece": 0.13894999999999996, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03543307086614173, "calib/gap": 0.02305158494147419, "calib/mean_conf": 0.5204988188976377, "calib/mu_c": 0.5303910344827586, "calib/mu_w": 0.5073394495412844, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.044291338582677184, "calib/std_conf": 0.19267354330562245, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2638.0, "completions/max_terminated_length": 2638.0, "completions/mean_length": 602.95703125, "completions/mean_terminated_length": 602.95703125, "completions/min_length": 206.0, "completions/min_terminated_length": 206.0, "epoch": 0.20266666666666666, "grad_norm": 0.008090218529105186, "learning_rate": 2.7777777777777776e-07, "loss": 0.0204, "num_tokens": 40558332.0, "reward": 1.1397864818572998, "reward_std": 0.20814228057861328, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7209792733192444, "rewards/format_reward_step": 0.9921875, "step": 190 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.21869845094624907, "aux_distill/mean_u": 0.06494175114544838, "aux_distill/n_active_tok": 559.21875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5373224251648909, "calib/avg_num_step_conf": 8.71875, "calib/ece": 0.16432480314960635, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.011381253170979266, "calib/mean_conf": 0.5445728346456692, "calib/mu_c": 0.5511148148148148, "calib/mu_w": 0.5397335616438356, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14185039370078745, "calib/std_conf": 0.18207366362671173, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2965.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 541.48046875, "completions/mean_terminated_length": 541.48046875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.20373333333333332, "grad_norm": 0.007324114441871643, "learning_rate": 2.5000000000000004e-07, "loss": 0.0204, "num_tokens": 40801119.0, "reward": 1.0630748271942139, "reward_std": 0.20367825031280518, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.7081811428070068, "rewards/format_reward_step": 0.9921875, "step": 191 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2494658336509019, "aux_distill/mean_u": 0.08475135246744968, "aux_distill/n_active_tok": 539.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5712225705329153, "calib/avg_num_step_conf": 8.48046875, "calib/ece": 0.12419607843137251, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.054901960784313725, "calib/gap": 0.052937304075235114, "calib/mean_conf": 0.5196470588235294, "calib/mu_c": 0.5424827586206896, "calib/mu_w": 0.4895454545454545, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03760784313725488, "calib/std_conf": 0.19056436548659947, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 568.83203125, "completions/mean_terminated_length": 571.0628051757812, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.2048, "grad_norm": 0.007168214302510023, "learning_rate": 2.2222222222222224e-07, "loss": 0.0243, "num_tokens": 41051716.0, "reward": 1.1472058296203613, "reward_std": 0.20757043361663818, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7358179688453674, "rewards/format_reward_step": 0.9921875, "step": 192 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24612186197191477, "aux_distill/mean_u": 0.06994783112146444, "aux_distill/n_active_tok": 584.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6323058939338009, "calib/avg_num_step_conf": 9.125, "calib/ece": 0.07956862745098042, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": 0.08549095607235152, "calib/mean_conf": 0.5361960784313725, "calib/mu_c": 0.5794444444444445, "calib/mu_w": 0.493953488372093, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06082352941176473, "calib/std_conf": 0.1779558239528534, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2645.0, "completions/max_terminated_length": 2645.0, "completions/mean_length": 565.8125, "completions/mean_terminated_length": 565.8125, "completions/min_length": 230.0, "completions/min_terminated_length": 230.0, "epoch": 0.20586666666666667, "grad_norm": 0.006805724464356899, "learning_rate": 1.9444444444444447e-07, "loss": 0.0322, "num_tokens": 41302276.0, "reward": 1.118457317352295, "reward_std": 0.22730928659439087, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7525394558906555, "rewards/format_reward_step": 0.9921875, "step": 193 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2551674894057214, "aux_distill/mean_u": 0.09176961205337196, "aux_distill/n_active_tok": 491.65625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5459585459585459, "calib/avg_num_step_conf": 7.7578125, "calib/ece": 0.1108271653543307, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03937007874015748, "calib/gap": 0.02570521640521639, "calib/mean_conf": 0.5436610236220473, "calib/mu_c": 0.5548944055944055, "calib/mu_w": 0.5291891891891891, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04574803149606298, "calib/std_conf": 0.19019859310579726, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2413.0, "completions/max_terminated_length": 2413.0, "completions/mean_length": 510.03125, "completions/mean_terminated_length": 512.0314331054688, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.20693333333333333, "grad_norm": 0.00830480270087719, "learning_rate": 1.6666666666666668e-07, "loss": 0.0502, "num_tokens": 41538788.0, "reward": 1.1304701566696167, "reward_std": 0.2282831370830536, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7179715633392334, "rewards/format_reward_step": 0.984375, "step": 194 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.26188449282199144, "aux_distill/mean_u": 0.09851403474716852, "aux_distill/n_active_tok": 497.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5571828358208955, "calib/avg_num_step_conf": 7.89453125, "calib/ece": 0.12019566929133857, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.051181102362204724, "calib/gap": 0.04633953980099503, "calib/mean_conf": 0.5530326771653543, "calib/mu_c": 0.5749253731343283, "calib/mu_w": 0.5285858333333333, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07283464566929132, "calib/std_conf": 0.17800778558107633, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1612.0, "completions/max_terminated_length": 1612.0, "completions/mean_length": 502.91796875, "completions/mean_terminated_length": 504.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.208, "grad_norm": 0.007816116325557232, "learning_rate": 1.3888888888888888e-07, "loss": 0.0065, "num_tokens": 41773519.0, "reward": 1.1256775856018066, "reward_std": 0.17670682072639465, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7357300519943237, "rewards/format_reward_step": 0.9921875, "step": 195 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.25544829899445176, "aux_distill/mean_u": 0.0779318698241067, "aux_distill/n_active_tok": 490.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5593128505864355, "calib/avg_num_step_conf": 7.67578125, "calib/ece": 0.09490984251968505, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.028060135135135167, "calib/mean_conf": 0.55635, "calib/mu_c": 0.5680601351351352, "calib/mu_w": 0.54, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03429133858267718, "calib/std_conf": 0.16964918057127165, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 932.0, "completions/max_terminated_length": 932.0, "completions/mean_length": 454.61328125, "completions/mean_terminated_length": 456.3961181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.20906666666666668, "grad_norm": 0.008982392027974129, "learning_rate": 1.1111111111111112e-07, "loss": 0.0266, "num_tokens": 41992444.0, "reward": 1.1527657508850098, "reward_std": 0.17497286200523376, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.73521888256073, "rewards/format_reward_step": 0.9921875, "step": 196 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.30042441678233445, "aux_distill/mean_u": 0.12130369822225158, "aux_distill/n_active_tok": 494.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6012697049622768, "calib/avg_num_step_conf": 7.76171875, "calib/ece": 0.07515624999999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.05859375, "calib/gap": 0.06255903821382569, "calib/mean_conf": 0.572890625, "calib/mu_c": 0.6019708029197081, "calib/mu_w": 0.5394117647058824, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05644531249999998, "calib/std_conf": 0.1681013103372766, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 511.00390625, "completions/mean_terminated_length": 513.0078735351562, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.21013333333333334, "grad_norm": 0.007946827448904514, "learning_rate": 8.333333333333334e-08, "loss": 0.0417, "num_tokens": 42228317.0, "reward": 1.1439175605773926, "reward_std": 0.18342596292495728, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7526788711547852, "rewards/format_reward_step": 1.0, "step": 197 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24310559080913663, "aux_distill/mean_u": 0.06648958651023255, "aux_distill/n_active_tok": 526.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.486159169550173, "calib/avg_num_step_conf": 8.21875, "calib/ece": 0.1758301960784314, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": -0.008017647058823574, "calib/mean_conf": 0.5491109803921569, "calib/mu_c": 0.5459039215686274, "calib/mu_w": 0.553921568627451, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06247058823529414, "calib/std_conf": 0.1798272084503311, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 543.18359375, "completions/mean_terminated_length": 543.18359375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.2112, "grad_norm": 0.007474220357835293, "learning_rate": 5.555555555555556e-08, "loss": 0.0647, "num_tokens": 42472756.0, "reward": 1.1560783386230469, "reward_std": 0.19086578488349915, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7184067368507385, "rewards/format_reward_step": 0.99609375, "step": 198 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2779740021796897, "aux_distill/mean_u": 0.11147548417562779, "aux_distill/n_active_tok": 554.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5123243933588761, "calib/avg_num_step_conf": 8.8203125, "calib/ece": 0.13972766798418973, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05138339920948617, "calib/gap": 0.014945051085568206, "calib/mean_conf": 0.552999604743083, "calib/mu_c": 0.5593793103448275, "calib/mu_w": 0.5444342592592593, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.059802371541502, "calib/std_conf": 0.16990868054094213, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2556.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 573.48828125, "completions/mean_terminated_length": 575.7373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.21226666666666666, "grad_norm": 0.006655022501945496, "learning_rate": 2.777777777777778e-08, "loss": 0.0542, "num_tokens": 42723769.0, "reward": 1.1397393941879272, "reward_std": 0.22068241238594055, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7247913479804993, "rewards/format_reward_step": 0.98828125, "step": 199 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.24724427005276084, "aux_distill/mean_u": 0.07388270953856758, "aux_distill/n_active_tok": 515.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.561402942694889, "calib/avg_num_step_conf": 8.0625, "calib/ece": 0.10376403162055337, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.033141281621063334, "calib/mean_conf": 0.5397324110671937, "calib/mu_c": 0.5533557046979865, "calib/mu_w": 0.5202144230769231, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.027281818181818195, "calib/std_conf": 0.16138108452849692, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2466.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 568.984375, "completions/mean_terminated_length": 568.984375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.21333333333333335, "grad_norm": 0.007749881129711866, "learning_rate": 0.0, "loss": 0.0315, "num_tokens": 42977477.0, "reward": 1.1535335779190063, "reward_std": 0.178613543510437, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7367547154426575, "rewards/format_reward_step": 0.98828125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.050044363581109794, "train_runtime": 17251.8426, "train_samples_per_second": 2.968, "train_steps_per_second": 0.012 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 42977477, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }