{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5072314549050652, "calibration/batch_distribution_entropy": 0.25912438679992567, "calibration/confidence_entropy": 0.21267152309193102, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4754453653490055, "calibration/mean_confidence": 0.9199576586928551, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019357638888888883, "completions/max_length": 4044.2, "completions/max_terminated_length": 4044.2, "completions/mean_length": 522.4719482421875, "completions/mean_terminated_length": 532.7948364257812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.0038337723817676306, "learning_rate": 5.952380952380953e-07, "loss": 0.006, "num_tokens": 9133085.0, "reward": 0.4843825876712799, "reward_std": 0.4449997007846832, "rewards/accuracy_reward": 0.2596354126930237, "rewards/batch_coverage_0": 0.006870156154036522, "rewards/batch_coverage_1": 0.006870156154036522, "rewards/batch_coverage_10": 0.01887576384469867, "rewards/batch_coverage_15": 0.034924595057964324, "rewards/batch_coverage_20": 0.060147954523563384, "rewards/batch_coverage_25": 0.07294031232595444, "rewards/batch_coverage_5": 0.00987731795758009, "rewards/brier_reward": 0.31120150685310366, "rewards/confidence_uniqueness_reward": 0.2898163080215454, "rewards/format_reward": 0.5999999880790711, "rewards/frontier_aurc_reward": 0.27386353611946107, "rewards/frontier_ece_reward": 0.27386353611946107, "rewards/frontier_entropy_batch_reward": -0.5739716529846192, "signal/accuracy_reward/centered_abs_mean": 0.308056640625, "signal/accuracy_reward/group_std_mean": 0.3664651155471802, "signal/accuracy_reward/group_zero_std_frac": 0.09722222313284874, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1540283203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1540283203125, "signal/advantage_abs_mean": 0.3868393898010254, "signal/advantage_pre_scale_abs_mean": 0.3868393898010254, "signal/advantage_pre_scale_std": 0.4560303032398224, "signal/advantage_std": 0.4560303032398224, "signal/batch_coverage_0/centered_abs_mean": 0.015426980517804622, "signal/batch_coverage_0/group_std_mean": 0.03147497624158859, "signal/batch_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0015426980331540107, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0015426980331540107, "signal/batch_coverage_1/centered_abs_mean": 0.015426980517804622, "signal/batch_coverage_1/group_std_mean": 0.03147497624158859, "signal/batch_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0015426980331540107, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0015426980331540107, "signal/batch_coverage_10/centered_abs_mean": 0.027048196457326413, "signal/batch_coverage_10/group_std_mean": 0.04408616162836552, "signal/batch_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.002704819617792964, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002704819617792964, "signal/batch_coverage_15/centered_abs_mean": 0.045237339287996295, "signal/batch_coverage_15/group_std_mean": 0.06460195034742355, "signal/batch_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.004523734096437693, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.004523734096437693, "signal/batch_coverage_20/centered_abs_mean": 0.081603392213583, "signal/batch_coverage_20/group_std_mean": 0.10613198131322861, "signal/batch_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.008160339388996362, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.008160339388996362, "signal/batch_coverage_25/centered_abs_mean": 0.10508095026016236, "signal/batch_coverage_25/group_std_mean": 0.13301836997270583, "signal/batch_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.010508095007389784, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.010508095007389784, "signal/batch_coverage_5/centered_abs_mean": 0.018093755841255187, "signal/batch_coverage_5/group_std_mean": 0.03443591110408306, "signal/batch_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.001809375500306487, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.001809375500306487, "signal/brier_reward/centered_abs_mean": 0.31866692304611205, "signal/brier_reward/group_std_mean": 0.3707259178161621, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03186669200658798, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03186669200658798, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23531839847564698, "signal/confidence_uniqueness_reward/group_std_mean": 0.28704230189323426, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023531839624047278, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023531839624047278, "signal/format_reward/centered_abs_mean": 0.43978949785232546, "signal/format_reward/group_std_mean": 0.47480629086494447, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21989474892616273, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21989474892616273, "signal/frontier_aurc_reward/centered_abs_mean": 0.30898670554161073, "signal/frontier_aurc_reward/group_std_mean": 0.3652248322963715, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038623338099569083, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038623338099569083, "signal/frontier_ece_reward/centered_abs_mean": 0.30898670554161073, "signal/frontier_ece_reward/group_std_mean": 0.3652248322963715, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030898670479655267, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030898670479655267, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4491934359073639, "signal/frontier_entropy_batch_reward/group_std_mean": 0.48202226758003236, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0449193462729454, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0449193462729454, "step": 5 }, { "calibration/aurc": 0.4854229790205542, "calibration/batch_distribution_entropy": 0.20949222832148623, "calibration/confidence_entropy": 0.20656387188253925, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.44620888545316256, "calibration/mean_confidence": 0.9278155657407705, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01649305555555556, "completions/max_length": 3907.8, "completions/max_terminated_length": 3907.8, "completions/mean_length": 477.81553955078124, "completions/mean_terminated_length": 486.00384521484375, "completions/min_length": 0.0, "completions/min_terminated_length": 11.2, "epoch": 0.023999700003749954, "grad_norm": 0.004502618685364723, "learning_rate": 1.1904761904761906e-06, "loss": 0.0054, "num_tokens": 17720240.0, "reward": 0.547285407781601, "reward_std": 0.4114623963832855, "rewards/accuracy_reward": 0.2848090291023254, "rewards/batch_coverage_0": 0.005022120475769043, "rewards/batch_coverage_1": 0.005022120475769043, "rewards/batch_coverage_10": 0.018738916981965302, "rewards/batch_coverage_15": 0.033264560624957085, "rewards/batch_coverage_20": 0.044974899291992186, "rewards/batch_coverage_25": 0.05766131952404976, "rewards/batch_coverage_5": 0.009930487908422946, "rewards/brier_reward": 0.3467004060745239, "rewards/confidence_uniqueness_reward": 0.341348922252655, "rewards/format_reward": 0.704600703716278, "rewards/frontier_aurc_reward": 0.2998257100582123, "rewards/frontier_ece_reward": 0.2998257100582123, "rewards/frontier_entropy_batch_reward": -0.6741620063781738, "signal/accuracy_reward/centered_abs_mean": 0.3191785991191864, "signal/accuracy_reward/group_std_mean": 0.37992130517959594, "signal/accuracy_reward/group_zero_std_frac": 0.07222222462296486, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1595892995595932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1595892995595932, "signal/advantage_abs_mean": 0.34345640540122985, "signal/advantage_pre_scale_abs_mean": 0.34345640540122985, "signal/advantage_pre_scale_std": 0.4176114320755005, "signal/advantage_std": 0.4176114320755005, "signal/batch_coverage_0/centered_abs_mean": 0.013590708374977112, "signal/batch_coverage_0/group_std_mean": 0.028382838889956474, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0013590708374977111, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0013590708374977111, "signal/batch_coverage_1/centered_abs_mean": 0.013590708374977112, "signal/batch_coverage_1/group_std_mean": 0.028382838889956474, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0013590708374977111, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0013590708374977111, "signal/batch_coverage_10/centered_abs_mean": 0.019663126021623612, "signal/batch_coverage_10/group_std_mean": 0.03664802610874176, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0019663126906380056, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019663126906380056, "signal/batch_coverage_15/centered_abs_mean": 0.03258531279861927, "signal/batch_coverage_15/group_std_mean": 0.052305429428815844, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0032585313078016044, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0032585313078016044, "signal/batch_coverage_20/centered_abs_mean": 0.04690381959080696, "signal/batch_coverage_20/group_std_mean": 0.06873543262481689, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.004690382117405534, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.004690382117405534, "signal/batch_coverage_25/centered_abs_mean": 0.06806138753890992, "signal/batch_coverage_25/group_std_mean": 0.09285385459661484, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.00680613899603486, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.00680613899603486, "signal/batch_coverage_5/centered_abs_mean": 0.014666478522121907, "signal/batch_coverage_5/group_std_mean": 0.030367295444011688, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0014666478615254163, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0014666478615254163, "signal/brier_reward/centered_abs_mean": 0.3143719911575317, "signal/brier_reward/group_std_mean": 0.3694416105747223, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03143719844520092, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03143719844520092, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22439135909080504, "signal/confidence_uniqueness_reward/group_std_mean": 0.2801817238330841, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02243913747370243, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02243913747370243, "signal/format_reward/centered_abs_mean": 0.36346028447151185, "signal/format_reward/group_std_mean": 0.4252608478069305, "signal/format_reward/group_zero_std_frac": 0.002777777798473835, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18173014223575593, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.18173014223575593, "signal/frontier_aurc_reward/centered_abs_mean": 0.3132124185562134, "signal/frontier_aurc_reward/group_std_mean": 0.3720038115978241, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003915155259892345, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003915155259892345, "signal/frontier_ece_reward/centered_abs_mean": 0.3132124185562134, "signal/frontier_ece_reward/group_std_mean": 0.3720038115978241, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03132124207913876, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03132124207913876, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38731067180633544, "signal/frontier_entropy_batch_reward/group_std_mean": 0.444005960226059, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038731067627668384, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038731067627668384, "step": 10 }, { "calibration/aurc": 0.5478506539210682, "calibration/batch_distribution_entropy": 0.2786014424283696, "calibration/confidence_entropy": 0.22692472615850523, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5215523162541775, "calibration/mean_confidence": 0.9158861678111789, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01293402777777779, "completions/max_length": 3933.8, "completions/max_terminated_length": 3933.8, "completions/mean_length": 427.5373291015625, "completions/mean_terminated_length": 433.16363525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 29.4, "epoch": 0.03599955000562493, "grad_norm": 0.0019597464706748724, "learning_rate": 1.7857142857142859e-06, "loss": -0.009, "num_tokens": 25747454.0, "reward": 0.6696977734565734, "reward_std": 0.3273072600364685, "rewards/accuracy_reward": 0.30598958134651183, "rewards/batch_coverage_0": 0.012201336584985257, "rewards/batch_coverage_1": 0.012201336584985257, "rewards/batch_coverage_10": 0.028961936943233013, "rewards/batch_coverage_15": 0.0324846439063549, "rewards/batch_coverage_20": 0.03875870779156685, "rewards/batch_coverage_25": 0.049946078658103944, "rewards/batch_coverage_5": 0.01885376647114754, "rewards/brier_reward": 0.40501958727836607, "rewards/confidence_uniqueness_reward": 0.48818616271018983, "rewards/format_reward": 0.9152777671813965, "rewards/frontier_aurc_reward": 0.3329282164573669, "rewards/frontier_ece_reward": 0.3329282164573669, "rewards/frontier_entropy_batch_reward": -0.8705167293548584, "signal/accuracy_reward/centered_abs_mean": 0.31670464277267457, "signal/accuracy_reward/group_std_mean": 0.3770868182182312, "signal/accuracy_reward/group_zero_std_frac": 0.07777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15835232138633729, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15835232138633729, "signal/advantage_abs_mean": 0.2627263814210892, "signal/advantage_pre_scale_abs_mean": 0.2627263814210892, "signal/advantage_pre_scale_std": 0.3356821537017822, "signal/advantage_std": 0.3356821537017822, "signal/batch_coverage_0/centered_abs_mean": 0.019925065338611603, "signal/batch_coverage_0/group_std_mean": 0.03897041603922844, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.001992506510578096, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001992506510578096, "signal/batch_coverage_1/centered_abs_mean": 0.019925065338611603, "signal/batch_coverage_1/group_std_mean": 0.03897041603922844, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.001992506510578096, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001992506510578096, "signal/batch_coverage_10/centered_abs_mean": 0.026121413335204124, "signal/batch_coverage_10/group_std_mean": 0.04870801717042923, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.002612141496501863, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002612141496501863, "signal/batch_coverage_15/centered_abs_mean": 0.028343011811375617, "signal/batch_coverage_15/group_std_mean": 0.0515684649348259, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.002834301325492561, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002834301325492561, "signal/batch_coverage_20/centered_abs_mean": 0.0341463316231966, "signal/batch_coverage_20/group_std_mean": 0.05852316170930862, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0034146332647651432, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0034146332647651432, "signal/batch_coverage_25/centered_abs_mean": 0.04928958341479302, "signal/batch_coverage_25/group_std_mean": 0.07621403560042381, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.004928958602249622, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.004928958602249622, "signal/batch_coverage_5/centered_abs_mean": 0.021870536357164384, "signal/batch_coverage_5/group_std_mean": 0.04211917594075203, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021870536962524056, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021870536962524056, "signal/brier_reward/centered_abs_mean": 0.3000001132488251, "signal/brier_reward/group_std_mean": 0.35329429507255555, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03000001087784767, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03000001087784767, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18271521627902984, "signal/confidence_uniqueness_reward/group_std_mean": 0.23359984457492827, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018271520733833313, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018271520733833313, "signal/format_reward/centered_abs_mean": 0.13900824636220932, "signal/format_reward/group_std_mean": 0.22593195140361785, "signal/format_reward/group_zero_std_frac": 0.20833333656191827, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06950412318110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.06950412318110466, "signal/frontier_aurc_reward/centered_abs_mean": 0.3091962695121765, "signal/frontier_aurc_reward/group_std_mean": 0.36607717275619506, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038649535737931727, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038649535737931727, "signal/frontier_ece_reward/centered_abs_mean": 0.3091962695121765, "signal/frontier_ece_reward/group_std_mean": 0.36607717275619506, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03091962859034538, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03091962859034538, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20600322484970093, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3077341616153717, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06666666939854622, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02060032319277525, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02060032319277525, "step": 15 }, { "calibration/aurc": 0.4821319044702136, "calibration/batch_distribution_entropy": 0.3488917045405391, "calibration/buffer_distribution_entropy": 0.2759978235953544, "calibration/confidence_entropy": 0.2768365393170709, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.042105263157894736, "calibration/coverage@5%": 0.0, "calibration/ece": 0.42374968063460816, "calibration/mean_confidence": 0.897871143815777, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010329861111111116, "completions/max_length": 4012.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 444.3298645019531, "completions/mean_terminated_length": 448.9945007324219, "completions/min_length": 0.0, "completions/min_terminated_length": 77.6, "epoch": 0.04799940000749991, "grad_norm": 0.001022929442115128, "learning_rate": 2.380952380952381e-06, "loss": -0.0083, "num_tokens": 33979830.0, "reward": 0.7796724319458008, "reward_std": 0.287617164850235, "rewards/accuracy_reward": 0.4071180522441864, "rewards/batch_coverage_0": 0.021281986311078072, "rewards/batch_coverage_1": 0.021281986311078072, "rewards/batch_coverage_10": 0.07034937888383866, "rewards/batch_coverage_15": 0.07969716563820839, "rewards/batch_coverage_20": 0.10704035162925721, "rewards/batch_coverage_25": 0.1295667678117752, "rewards/batch_coverage_5": 0.03042309693992138, "rewards/brier_reward": 0.5185995876789093, "rewards/confidence_uniqueness_reward": 0.5843602895736695, "rewards/format_reward": 0.98203125, "rewards/frontier_aurc_reward": 0.2070214475505054, "rewards/frontier_ece_reward": 0.19309240709990264, "rewards/frontier_entropy_batch_reward": -0.9305931091308594, "signal/accuracy_reward/centered_abs_mean": 0.30668402314186094, "signal/accuracy_reward/group_std_mean": 0.37011672258377076, "signal/accuracy_reward/group_zero_std_frac": 0.08611111342906952, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15334201157093047, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15334201157093047, "signal/advantage_abs_mean": 0.22955691814422607, "signal/advantage_pre_scale_abs_mean": 0.22955691814422607, "signal/advantage_pre_scale_std": 0.2956406891345978, "signal/advantage_std": 0.2956406891345978, "signal/batch_coverage_0/centered_abs_mean": 0.027153197303414346, "signal/batch_coverage_0/group_std_mean": 0.0489169105887413, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0027153198141604664, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0027153198141604664, "signal/batch_coverage_1/centered_abs_mean": 0.027153197303414346, "signal/batch_coverage_1/group_std_mean": 0.0489169105887413, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0027153198141604664, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0027153198141604664, "signal/batch_coverage_10/centered_abs_mean": 0.044522304087877274, "signal/batch_coverage_10/group_std_mean": 0.07788674235343933, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.004452230548486114, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.004452230548486114, "signal/batch_coverage_15/centered_abs_mean": 0.04999452456831932, "signal/batch_coverage_15/group_std_mean": 0.08566224724054336, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.004999452503398061, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.004999452503398061, "signal/batch_coverage_20/centered_abs_mean": 0.07234487235546112, "signal/batch_coverage_20/group_std_mean": 0.11505262106657028, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.007234487310051918, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.007234487310051918, "signal/batch_coverage_25/centered_abs_mean": 0.10189752876758576, "signal/batch_coverage_25/group_std_mean": 0.15013812482357025, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.010189752839505673, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.010189752839505673, "signal/batch_coverage_5/centered_abs_mean": 0.02882133685052395, "signal/batch_coverage_5/group_std_mean": 0.05224255993962288, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0028821338899433615, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0028821338899433615, "signal/brier_reward/centered_abs_mean": 0.276527601480484, "signal/brier_reward/group_std_mean": 0.3314927339553833, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027652759104967117, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.027652759104967117, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15654111206531524, "signal/confidence_uniqueness_reward/group_std_mean": 0.19497571289539337, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015654110908508302, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015654110908508302, "signal/format_reward/centered_abs_mean": 0.03294813297688961, "signal/format_reward/group_std_mean": 0.07249769493937493, "signal/format_reward/group_zero_std_frac": 0.6666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016474066488444804, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016474066488444804, "signal/frontier_aurc_reward/centered_abs_mean": 0.150981783028692, "signal/frontier_aurc_reward/group_std_mean": 0.18202955992892383, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0018872723849199247, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0018872723849199247, "signal/frontier_ece_reward/centered_abs_mean": 0.2331768661737442, "signal/frontier_ece_reward/group_std_mean": 0.2786339819431305, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.023317687213420868, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.023317687213420868, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12025448828935623, "signal/frontier_entropy_batch_reward/group_std_mean": 0.22101308405399323, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.18888889104127884, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012025448866188527, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012025448866188527, "step": 20 }, { "calibration/aurc": 0.3805529542630866, "calibration/batch_distribution_entropy": 0.5245517949798162, "calibration/buffer_distribution_entropy": 0.32109023757926547, "calibration/confidence_entropy": 0.3423178730715135, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.02198952879581152, "calibration/coverage@20%": 0.10837696335078535, "calibration/coverage@25%": 0.13298429319371727, "calibration/coverage@30%": 0.21904761904761907, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2970157676613897, "calibration/mean_confidence": 0.8597930621346853, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010937499999999978, "completions/max_length": 3883.6, "completions/max_terminated_length": 3883.6, "completions/mean_length": 481.8978332519531, "completions/mean_terminated_length": 487.2155456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 94.8, "epoch": 0.05999925000937488, "grad_norm": 0.0010147301945835352, "learning_rate": 2.9761904761904763e-06, "loss": -0.0063, "num_tokens": 42655741.0, "reward": 0.8809885144233703, "reward_std": 0.2663578689098358, "rewards/accuracy_reward": 0.5109375, "rewards/batch_coverage_0": 0.05274242460727692, "rewards/batch_coverage_1": 0.05274242460727692, "rewards/batch_coverage_10": 0.156163290143013, "rewards/batch_coverage_15": 0.17960385233163834, "rewards/batch_coverage_20": 0.21484352350234986, "rewards/batch_coverage_25": 0.2300809234380722, "rewards/batch_coverage_5": 0.07413579896092415, "rewards/brier_reward": 0.6292544364929199, "rewards/confidence_uniqueness_reward": 0.6723939895629882, "rewards/format_reward": 0.9863715410232544, "rewards/frontier_aurc_reward": -0.004603662062436342, "rewards/frontier_ece_reward": -0.0024937400594353676, "rewards/frontier_entropy_batch_reward": -0.9355515599250793, "signal/accuracy_reward/centered_abs_mean": 0.27855902910232544, "signal/accuracy_reward/group_std_mean": 0.3460928499698639, "signal/accuracy_reward/group_zero_std_frac": 0.1111111119389534, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13927951455116272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13927951455116272, "signal/advantage_abs_mean": 0.2031567931175232, "signal/advantage_pre_scale_abs_mean": 0.2031567931175232, "signal/advantage_pre_scale_std": 0.2749268710613251, "signal/advantage_std": 0.2749268710613251, "signal/batch_coverage_0/centered_abs_mean": 0.04045618698000908, "signal/batch_coverage_0/group_std_mean": 0.06610891073942185, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.004045618698000908, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.004045618698000908, "signal/batch_coverage_1/centered_abs_mean": 0.04045618698000908, "signal/batch_coverage_1/group_std_mean": 0.06610891073942185, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.004045618698000908, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.004045618698000908, "signal/batch_coverage_10/centered_abs_mean": 0.07423198148608208, "signal/batch_coverage_10/group_std_mean": 0.12178197354078293, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.007423198502510786, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.007423198502510786, "signal/batch_coverage_15/centered_abs_mean": 0.08689813762903213, "signal/batch_coverage_15/group_std_mean": 0.14011010825634002, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.008689813409000635, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.008689813409000635, "signal/batch_coverage_20/centered_abs_mean": 0.11602874398231507, "signal/batch_coverage_20/group_std_mean": 0.1792426437139511, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011602874659001828, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.011602874659001828, "signal/batch_coverage_25/centered_abs_mean": 0.13573874533176422, "signal/batch_coverage_25/group_std_mean": 0.20239817202091218, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013573875278234481, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013573875278234481, "signal/batch_coverage_5/centered_abs_mean": 0.04396994709968567, "signal/batch_coverage_5/group_std_mean": 0.07224964424967766, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.004396994784474373, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.004396994784474373, "signal/brier_reward/centered_abs_mean": 0.2282465249300003, "signal/brier_reward/group_std_mean": 0.282819801568985, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022824652120471002, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022824652120471002, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1486896753311157, "signal/confidence_uniqueness_reward/group_std_mean": 0.178310364484787, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014868967980146409, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014868967980146409, "signal/format_reward/centered_abs_mean": 0.02458224818110466, "signal/format_reward/group_std_mean": 0.050415501743555066, "signal/format_reward/group_zero_std_frac": 0.7805555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01229112409055233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01229112409055233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032616199925541876, "signal/frontier_aurc_reward/group_std_mean": 0.004778617806732654, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0770251507638024e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0770251507638024e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.1397606164216995, "signal/frontier_ece_reward/group_std_mean": 0.16592053472995758, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013976060971617698, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013976060971617698, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1111811563372612, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2114289492368698, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.26111111640930174, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011118116043508052, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011118116043508052, "step": 25 }, { "calibration/aurc": 0.3004850488737162, "calibration/batch_distribution_entropy": 0.6425691893665822, "calibration/buffer_distribution_entropy": 0.39893373230297585, "calibration/confidence_entropy": 0.3918151368055475, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.07807486631016043, "calibration/coverage@25%": 0.23711550510717042, "calibration/coverage@30%": 0.5197976414978907, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17719548548718614, "calibration/mean_confidence": 0.8239461402890431, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015104166666666674, "completions/max_length": 3855.4, "completions/max_terminated_length": 3855.4, "completions/mean_length": 558.49384765625, "completions/mean_terminated_length": 567.1655395507812, "completions/min_length": 0.0, "completions/min_terminated_length": 117.8, "epoch": 0.07199910001124986, "grad_norm": 0.0006667505949735641, "learning_rate": 3.5714285714285718e-06, "loss": -0.0094, "num_tokens": 52199510.0, "reward": 0.9673492670059204, "reward_std": 0.25074209868907926, "rewards/accuracy_reward": 0.5760416626930237, "rewards/batch_coverage_0": 0.1101593405008316, "rewards/batch_coverage_1": 0.1101593405008316, "rewards/batch_coverage_10": 0.2120990425348282, "rewards/batch_coverage_15": 0.23272857069969177, "rewards/batch_coverage_20": 0.2680616557598114, "rewards/batch_coverage_25": 0.2810434937477112, "rewards/batch_coverage_5": 0.1666581004858017, "rewards/brier_reward": 0.6929940223693848, "rewards/confidence_uniqueness_reward": 0.7037853598594666, "rewards/format_reward": 0.9827257037162781, "rewards/frontier_aurc_reward": -0.0037181817460805178, "rewards/frontier_ece_reward": 0.01464775917120278, "rewards/frontier_entropy_batch_reward": -0.9122162461280823, "signal/accuracy_reward/centered_abs_mean": 0.24763454794883727, "signal/accuracy_reward/group_std_mean": 0.3104135751724243, "signal/accuracy_reward/group_zero_std_frac": 0.1777777835726738, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12381727397441863, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12381727397441863, "signal/advantage_abs_mean": 0.18919424712657928, "signal/advantage_pre_scale_abs_mean": 0.18919424712657928, "signal/advantage_pre_scale_std": 0.2605606704950333, "signal/advantage_std": 0.2605606704950333, "signal/batch_coverage_0/centered_abs_mean": 0.06281921863555909, "signal/batch_coverage_0/group_std_mean": 0.09205281436443329, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0062819220125675205, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0062819220125675205, "signal/batch_coverage_1/centered_abs_mean": 0.06281921863555909, "signal/batch_coverage_1/group_std_mean": 0.09205281436443329, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0062819220125675205, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0062819220125675205, "signal/batch_coverage_10/centered_abs_mean": 0.0929559051990509, "signal/batch_coverage_10/group_std_mean": 0.13886004090309143, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.009295590780675411, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.009295590780675411, "signal/batch_coverage_15/centered_abs_mean": 0.10421416014432908, "signal/batch_coverage_15/group_std_mean": 0.15459922552108765, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.010421415977180003, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.010421415977180003, "signal/batch_coverage_20/centered_abs_mean": 0.13094892650842666, "signal/batch_coverage_20/group_std_mean": 0.19057590663433074, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013094892725348473, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013094892725348473, "signal/batch_coverage_25/centered_abs_mean": 0.1444714769721031, "signal/batch_coverage_25/group_std_mean": 0.20690469741821288, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014447147585451603, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.014447147585451603, "signal/batch_coverage_5/centered_abs_mean": 0.0769407331943512, "signal/batch_coverage_5/group_std_mean": 0.11383293271064758, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.007694073114544153, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.007694073114544153, "signal/brier_reward/centered_abs_mean": 0.19581978023052216, "signal/brier_reward/group_std_mean": 0.2459787905216217, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019581978768110277, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019581978768110277, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15202451646327972, "signal/confidence_uniqueness_reward/group_std_mean": 0.18180184066295624, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01520245186984539, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01520245186984539, "signal/format_reward/centered_abs_mean": 0.02896592915058136, "signal/format_reward/group_std_mean": 0.05586763843894005, "signal/format_reward/group_zero_std_frac": 0.7666666626930236, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01448296457529068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01448296457529068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027071074582636356, "signal/frontier_aurc_reward/group_std_mean": 0.003949257265776396, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3838843592093326e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3838843592093326e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.09545410424470901, "signal/frontier_ece_reward/group_std_mean": 0.1201583355665207, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009545410610735416, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009545410610735416, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15066551864147187, "signal/frontier_entropy_batch_reward/group_std_mean": 0.27135526239871977, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.13888889029622078, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015066551603376865, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015066551603376865, "step": 30 }, { "calibration/aurc": 0.23015040121185343, "calibration/batch_distribution_entropy": 0.5256467913204419, "calibration/buffer_distribution_entropy": 0.44834076164911146, "calibration/confidence_entropy": 0.26321980631678177, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.14310274655102243, "calibration/coverage@20%": 0.4793298240583419, "calibration/coverage@25%": 0.7428456379652152, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.0, "calibration/ece": 0.18346354831539727, "calibration/mean_confidence": 0.8748998868807243, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020486111111111094, "completions/max_length": 3840.6, "completions/max_terminated_length": 3840.6, "completions/mean_length": 615.801318359375, "completions/mean_terminated_length": 628.7870483398438, "completions/min_length": 0.0, "completions/min_terminated_length": 162.4, "epoch": 0.08399895001312484, "grad_norm": 0.0009780606487765908, "learning_rate": 4.166666666666667e-06, "loss": -0.0154, "num_tokens": 62370981.0, "reward": 1.074914562702179, "reward_std": 0.24743359982967378, "rewards/accuracy_reward": 0.6279513835906982, "rewards/batch_coverage_0": 0.1995459720492363, "rewards/batch_coverage_1": 0.1995459720492363, "rewards/batch_coverage_10": 0.3318263113498688, "rewards/batch_coverage_15": 0.35595046579837797, "rewards/batch_coverage_20": 0.37790881991386416, "rewards/batch_coverage_25": 0.3956703066825867, "rewards/batch_coverage_5": 0.2661326140165329, "rewards/brier_reward": 0.7158191680908204, "rewards/confidence_uniqueness_reward": 0.6305435240268707, "rewards/format_reward": 0.978124988079071, "rewards/frontier_aurc_reward": -0.003513122210279107, "rewards/frontier_ece_reward": 0.036505474848672746, "rewards/frontier_entropy_batch_reward": -0.7902460932731629, "signal/accuracy_reward/centered_abs_mean": 0.21014539897441864, "signal/accuracy_reward/group_std_mean": 0.26572045087814333, "signal/accuracy_reward/group_zero_std_frac": 0.2888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10507269948720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10507269948720932, "signal/advantage_abs_mean": 0.18241074681282043, "signal/advantage_pre_scale_abs_mean": 0.18241074681282043, "signal/advantage_pre_scale_std": 0.26696314215660094, "signal/advantage_std": 0.26696314215660094, "signal/batch_coverage_0/centered_abs_mean": 0.06104315221309662, "signal/batch_coverage_0/group_std_mean": 0.09295270442962647, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.00610431581735611, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00610431581735611, "signal/batch_coverage_1/centered_abs_mean": 0.06104315221309662, "signal/batch_coverage_1/group_std_mean": 0.09295270442962647, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.00610431581735611, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00610431581735611, "signal/batch_coverage_10/centered_abs_mean": 0.100413478910923, "signal/batch_coverage_10/group_std_mean": 0.15636947602033616, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0100413478910923, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0100413478910923, "signal/batch_coverage_15/centered_abs_mean": 0.11477141976356506, "signal/batch_coverage_15/group_std_mean": 0.1760800987482071, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01147714201360941, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01147714201360941, "signal/batch_coverage_20/centered_abs_mean": 0.13171382546424865, "signal/batch_coverage_20/group_std_mean": 0.19760640561580659, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013171382062137128, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013171382062137128, "signal/batch_coverage_25/centered_abs_mean": 0.1536375403404236, "signal/batch_coverage_25/group_std_mean": 0.22362993359565736, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015363754145801068, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015363754145801068, "signal/batch_coverage_5/centered_abs_mean": 0.07415727078914643, "signal/batch_coverage_5/group_std_mean": 0.1162964329123497, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.007415727060288191, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.007415727060288191, "signal/brier_reward/centered_abs_mean": 0.19005568623542785, "signal/brier_reward/group_std_mean": 0.2425243467092514, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01900556981563568, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01900556981563568, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23612670004367828, "signal/confidence_uniqueness_reward/group_std_mean": 0.27312268912792204, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023612670600414276, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023612670600414276, "signal/format_reward/centered_abs_mean": 0.03466796837747097, "signal/format_reward/group_std_mean": 0.060341247171163556, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017333984188735486, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017333984188735486, "signal/frontier_aurc_reward/centered_abs_mean": 0.004159660637378692, "signal/frontier_aurc_reward/group_std_mean": 0.005809722188860178, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1995759713463484e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1995759713463484e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.09585424959659576, "signal/frontier_ece_reward/group_std_mean": 0.12318123281002044, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009585425443947315, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009585425443947315, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2937092065811157, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4011690020561218, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.027777778543531896, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02937092147767544, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02937092147767544, "step": 35 }, { "calibration/aurc": 0.24487568965136558, "calibration/batch_distribution_entropy": 0.6500097565212745, "calibration/buffer_distribution_entropy": 0.4660041553477049, "calibration/confidence_entropy": 0.2837820801954671, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.4201621979024496, "calibration/coverage@25%": 0.6752763614382925, "calibration/coverage@30%": 0.849405582808501, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17441238108133758, "calibration/mean_confidence": 0.8161932812282213, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017100694444444443, "completions/max_length": 3569.8, "completions/max_terminated_length": 3569.8, "completions/mean_length": 670.5376953125, "completions/mean_terminated_length": 682.1771606445312, "completions/min_length": 0.0, "completions/min_terminated_length": 191.8, "epoch": 0.09599880001499982, "grad_norm": 0.0007898859912529588, "learning_rate": 4.761904761904762e-06, "loss": -0.017, "num_tokens": 73215095.0, "reward": 1.1504925727844237, "reward_std": 0.256816166639328, "rewards/accuracy_reward": 0.6532986164093018, "rewards/batch_coverage_0": 0.22187545001506806, "rewards/batch_coverage_1": 0.22187545001506806, "rewards/batch_coverage_10": 0.3847365379333496, "rewards/batch_coverage_15": 0.41597190499305725, "rewards/batch_coverage_20": 0.437878692150116, "rewards/batch_coverage_25": 0.45056607723236086, "rewards/batch_coverage_5": 0.31669565439224245, "rewards/brier_reward": 0.7430066347122193, "rewards/confidence_uniqueness_reward": 0.6594228565692901, "rewards/format_reward": 0.9816840291023254, "rewards/frontier_aurc_reward": -0.0030309411231428385, "rewards/frontier_ece_reward": 0.03790239319205284, "rewards/frontier_entropy_batch_reward": -0.5595400869846344, "signal/accuracy_reward/centered_abs_mean": 0.18483072519302368, "signal/accuracy_reward/group_std_mean": 0.24741473495960237, "signal/accuracy_reward/group_zero_std_frac": 0.28888889253139494, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09241536259651184, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09241536259651184, "signal/advantage_abs_mean": 0.18649652004241943, "signal/advantage_pre_scale_abs_mean": 0.18649652004241943, "signal/advantage_pre_scale_std": 0.27345457673072815, "signal/advantage_std": 0.27345457673072815, "signal/batch_coverage_0/centered_abs_mean": 0.11488340944051742, "signal/batch_coverage_0/group_std_mean": 0.16767307221889496, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011488340701907874, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011488340701907874, "signal/batch_coverage_1/centered_abs_mean": 0.11488340944051742, "signal/batch_coverage_1/group_std_mean": 0.16767307221889496, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011488340701907874, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011488340701907874, "signal/batch_coverage_10/centered_abs_mean": 0.16001660525798797, "signal/batch_coverage_10/group_std_mean": 0.23101125061511993, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016001661494374275, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016001661494374275, "signal/batch_coverage_15/centered_abs_mean": 0.17803999781608582, "signal/batch_coverage_15/group_std_mean": 0.254090678691864, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01780400052666664, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01780400052666664, "signal/batch_coverage_20/centered_abs_mean": 0.1952661693096161, "signal/batch_coverage_20/group_std_mean": 0.2745431065559387, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019526617601513863, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019526617601513863, "signal/batch_coverage_25/centered_abs_mean": 0.2111252725124359, "signal/batch_coverage_25/group_std_mean": 0.29278976321220396, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0211125273257494, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0211125273257494, "signal/batch_coverage_5/centered_abs_mean": 0.13654999434947968, "signal/batch_coverage_5/group_std_mean": 0.19951672554016114, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013654999434947968, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013654999434947968, "signal/brier_reward/centered_abs_mean": 0.19350166916847228, "signal/brier_reward/group_std_mean": 0.2490565538406372, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019350168108940125, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019350168108940125, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.24396952986717224, "signal/confidence_uniqueness_reward/group_std_mean": 0.2847494125366211, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02439695280045271, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02439695280045271, "signal/format_reward/centered_abs_mean": 0.0307020403444767, "signal/format_reward/group_std_mean": 0.05410040691494942, "signal/format_reward/group_zero_std_frac": 0.7888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01535102017223835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01535102017223835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033766867127269506, "signal/frontier_aurc_reward/group_std_mean": 0.004707662016153335, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2208583545289e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2208583545289e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.07757208049297333, "signal/frontier_ece_reward/group_std_mean": 0.10421034693717957, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0077572080306708814, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0077572080306708814, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3927412450313568, "signal/frontier_entropy_batch_reward/group_std_mean": 0.46981320977211, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01111111119389534, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03927412405610085, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03927412405610085, "step": 40 }, { "calibration/aurc": 0.16850152989275416, "calibration/batch_distribution_entropy": 0.7358482569441548, "calibration/buffer_distribution_entropy": 0.5184809949070356, "calibration/confidence_entropy": 0.31368130358296725, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.09847723025533593, "calibration/coverage@15%": 0.4264761977408342, "calibration/coverage@20%": 0.7963623568945273, "calibration/coverage@25%": 0.9617862772303235, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.021465968586387434, "calibration/ece": 0.13108366387334294, "calibration/mean_confidence": 0.7715716669142532, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015538194444444441, "completions/max_length": 3910.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 737.1761474609375, "completions/mean_terminated_length": 748.7971313476562, "completions/min_length": 0.0, "completions/min_terminated_length": 203.4, "epoch": 0.1079986500168748, "grad_norm": 0.000921767670661211, "learning_rate": 4.909638554216868e-06, "loss": -0.0162, "num_tokens": 84842628.0, "reward": 1.1769782543182372, "reward_std": 0.2623761177062988, "rewards/accuracy_reward": 0.6508680582046509, "rewards/batch_coverage_0": 0.23462156355381011, "rewards/batch_coverage_1": 0.23462156355381011, "rewards/batch_coverage_10": 0.37513543367385865, "rewards/batch_coverage_15": 0.40046226382255556, "rewards/batch_coverage_20": 0.42590277194976806, "rewards/batch_coverage_25": 0.4409227430820465, "rewards/batch_coverage_5": 0.3107755959033966, "rewards/brier_reward": 0.7588403224945068, "rewards/confidence_uniqueness_reward": 0.8319248676300048, "rewards/format_reward": 0.9840277671813965, "rewards/frontier_aurc_reward": -0.0025125455809757113, "rewards/frontier_ece_reward": 0.026418356224894524, "rewards/frontier_entropy_batch_reward": -0.4440083742141724, "signal/accuracy_reward/centered_abs_mean": 0.19178602397441863, "signal/accuracy_reward/group_std_mean": 0.2533793181180954, "signal/accuracy_reward/group_zero_std_frac": 0.2833333402872086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09589301198720931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09589301198720931, "signal/advantage_abs_mean": 0.19616546034812926, "signal/advantage_pre_scale_abs_mean": 0.19616546034812926, "signal/advantage_pre_scale_std": 0.27432178854942324, "signal/advantage_std": 0.27432178854942324, "signal/batch_coverage_0/centered_abs_mean": 0.1771412819623947, "signal/batch_coverage_0/group_std_mean": 0.24457992613315582, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017714128270745276, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017714128270745276, "signal/batch_coverage_1/centered_abs_mean": 0.1771412819623947, "signal/batch_coverage_1/group_std_mean": 0.24457992613315582, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017714128270745276, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017714128270745276, "signal/batch_coverage_10/centered_abs_mean": 0.21691608130931855, "signal/batch_coverage_10/group_std_mean": 0.29502947330474855, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.021691609546542166, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.021691609546542166, "signal/batch_coverage_15/centered_abs_mean": 0.22483428716659545, "signal/batch_coverage_15/group_std_mean": 0.30313605070114136, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.022483428567647935, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.022483428567647935, "signal/batch_coverage_20/centered_abs_mean": 0.2426200658082962, "signal/batch_coverage_20/group_std_mean": 0.3239041268825531, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02426200732588768, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02426200732588768, "signal/batch_coverage_25/centered_abs_mean": 0.25766364932060243, "signal/batch_coverage_25/group_std_mean": 0.3407094895839691, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.025766366347670554, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.025766366347670554, "signal/batch_coverage_5/centered_abs_mean": 0.19479849338531494, "signal/batch_coverage_5/group_std_mean": 0.2667528986930847, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01947984956204891, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01947984956204891, "signal/brier_reward/centered_abs_mean": 0.19988948702812195, "signal/brier_reward/group_std_mean": 0.25549029409885404, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019988948851823805, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019988948851823805, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1287672832608223, "signal/confidence_uniqueness_reward/group_std_mean": 0.16340579688549042, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012876728549599648, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012876728549599648, "signal/format_reward/centered_abs_mean": 0.02685546912252903, "signal/format_reward/group_std_mean": 0.052517061680555345, "signal/format_reward/group_zero_std_frac": 0.775, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013427734561264514, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013427734561264514, "signal/frontier_aurc_reward/centered_abs_mean": 0.002215990168042481, "signal/frontier_aurc_reward/group_std_mean": 0.0032423562835901974, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.76998780464055e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.76998780464055e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05805379226803779, "signal/frontier_ece_reward/group_std_mean": 0.07890773713588714, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005805379338562488, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005805379338562488, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40570557713508604, "signal/frontier_entropy_batch_reward/group_std_mean": 0.48176873922348024, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04057055860757828, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04057055860757828, "step": 45 }, { "calibration/aurc": 0.3657922181935306, "calibration/batch_distribution_entropy": 0.7370499011455982, "calibration/buffer_distribution_entropy": 0.5546174449056582, "calibration/confidence_entropy": 0.2774853426801891, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.01078167115902965, "calibration/coverage@15%": 0.011320754716981133, "calibration/coverage@20%": 0.06305855243974426, "calibration/coverage@25%": 0.23091601402664813, "calibration/coverage@30%": 0.4311086853697116, "calibration/coverage@5%": 0.0, "calibration/ece": 0.25106672354368215, "calibration/mean_confidence": 0.6794906292963364, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013194444444444463, "completions/max_length": 3656.0, "completions/max_terminated_length": 3656.0, "completions/mean_length": 794.4783081054687, "completions/mean_terminated_length": 805.1396240234375, "completions/min_length": 0.0, "completions/min_terminated_length": 239.0, "epoch": 0.11999850001874976, "grad_norm": 0.0008140106801874936, "learning_rate": 4.759036144578314e-06, "loss": -0.0145, "num_tokens": 97092618.0, "reward": 1.1897046327590943, "reward_std": 0.268722853064537, "rewards/accuracy_reward": 0.6429687380790711, "rewards/batch_coverage_0": 0.2885128676891327, "rewards/batch_coverage_1": 0.2885128676891327, "rewards/batch_coverage_10": 0.41407533288002013, "rewards/batch_coverage_15": 0.4346598982810974, "rewards/batch_coverage_20": 0.4520686209201813, "rewards/batch_coverage_25": 0.46169546246528625, "rewards/batch_coverage_5": 0.3437255859375, "rewards/brier_reward": 0.7455047130584717, "rewards/confidence_uniqueness_reward": 0.7925597071647644, "rewards/format_reward": 0.9861111283302307, "rewards/frontier_aurc_reward": -0.00259375493042171, "rewards/frontier_ece_reward": 0.03039606139063835, "rewards/frontier_entropy_batch_reward": -0.49973989129066465, "signal/accuracy_reward/centered_abs_mean": 0.19058702290058135, "signal/accuracy_reward/group_std_mean": 0.24706077873706817, "signal/accuracy_reward/group_zero_std_frac": 0.3222222298383713, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09529351145029068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09529351145029068, "signal/advantage_abs_mean": 0.20483049750328064, "signal/advantage_pre_scale_abs_mean": 0.20483049750328064, "signal/advantage_pre_scale_std": 0.2846250832080841, "signal/advantage_std": 0.2846250832080841, "signal/batch_coverage_0/centered_abs_mean": 0.20671135187149048, "signal/batch_coverage_0/group_std_mean": 0.2817295253276825, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02067113555967808, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.02067113555967808, "signal/batch_coverage_1/centered_abs_mean": 0.20671135187149048, "signal/batch_coverage_1/group_std_mean": 0.2817295253276825, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02067113555967808, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.02067113555967808, "signal/batch_coverage_10/centered_abs_mean": 0.24098095595836638, "signal/batch_coverage_10/group_std_mean": 0.3243285596370697, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.024098095670342445, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.024098095670342445, "signal/batch_coverage_15/centered_abs_mean": 0.24538744986057281, "signal/batch_coverage_15/group_std_mean": 0.3284584999084473, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02453874610364437, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.02453874610364437, "signal/batch_coverage_20/centered_abs_mean": 0.25401687026023867, "signal/batch_coverage_20/group_std_mean": 0.3383590757846832, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02540168762207031, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02540168762207031, "signal/batch_coverage_25/centered_abs_mean": 0.26198176145553587, "signal/batch_coverage_25/group_std_mean": 0.3468038856983185, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02619817741215229, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.02619817741215229, "signal/batch_coverage_5/centered_abs_mean": 0.21985240578651427, "signal/batch_coverage_5/group_std_mean": 0.29721832275390625, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.021985240653157233, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.021985240653157233, "signal/brier_reward/centered_abs_mean": 0.22155551016330718, "signal/brier_reward/group_std_mean": 0.2792714238166809, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022155551612377165, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022155551612377165, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1554034858942032, "signal/confidence_uniqueness_reward/group_std_mean": 0.19029141664505006, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015540349110960961, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015540349110960961, "signal/format_reward/centered_abs_mean": 0.02329644076526165, "signal/format_reward/group_std_mean": 0.0442409373819828, "signal/format_reward/group_zero_std_frac": 0.8138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011648220382630825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011648220382630825, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022925134282559155, "signal/frontier_aurc_reward/group_std_mean": 0.0032842617481946947, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.865641945390962e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.865641945390962e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05770089849829674, "signal/frontier_ece_reward/group_std_mean": 0.07514400482177734, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005770089849829674, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005770089849829674, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.41969223618507384, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4983623504638672, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04196922481060028, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04196922481060028, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.18383896034708969, "eval_calibration/batch_distribution_entropy": 0.6563989942090224, "eval_calibration/buffer_distribution_entropy": 0.5797339776512932, "eval_calibration/confidence_entropy": 0.25943922478959713, "eval_calibration/coverage@0%": 0.17271505376344085, "eval_calibration/coverage@1%": 0.17271505376344085, "eval_calibration/coverage@10%": 0.3370295698924732, "eval_calibration/coverage@15%": 0.478494623655914, "eval_calibration/coverage@20%": 0.6041666666666666, "eval_calibration/coverage@25%": 0.7447916666666666, "eval_calibration/coverage@30%": 0.9635416666666666, "eval_calibration/coverage@5%": 0.17271505376344085, "eval_calibration/ece": 0.2373774714833392, "eval_calibration/mean_confidence": 0.6960221763524769, "eval_completions/clipped_ratio": 0.009548611111111105, "eval_completions/max_length": 3419.6666666666665, "eval_completions/max_terminated_length": 3419.6666666666665, "eval_completions/mean_length": 820.9784952799479, "eval_completions/mean_terminated_length": 828.7581787109375, "eval_completions/min_length": 108.83333333333333, "eval_completions/min_terminated_length": 306.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 97092618.0, "eval_reward": 0.9636127551396688, "eval_reward_std": 0.3094961891571681, "eval_rewards/accuracy_reward": 0.6571180522441864, "eval_rewards/batch_coverage_0": 0.060765147441998124, "eval_rewards/batch_coverage_1": 0.060765147441998124, "eval_rewards/batch_coverage_10": 0.09114414701859157, "eval_rewards/batch_coverage_15": 0.12853367378314337, "eval_rewards/batch_coverage_20": 0.16657670897742113, "eval_rewards/batch_coverage_25": 0.2249002829194069, "eval_rewards/batch_coverage_5": 0.07248803181573749, "eval_rewards/brier_reward": 0.7720614771048228, "eval_rewards/confidence_uniqueness_reward": 0.7892127931118011, "eval_rewards/format_reward": 0.9869791666666666, "eval_rewards/frontier_aurc_reward": -0.0020255156753895185, "eval_rewards/frontier_ece_reward": 0.03642605741818746, "eval_rewards/frontier_entropy_batch_reward": -0.9869791666666666, "eval_runtime": 197.803, "eval_samples_per_second": 5.056, "eval_signal/accuracy_reward/centered_abs_mean": 0.4374457498391469, "eval_signal/accuracy_reward/group_std_mean": 0.4739072273174922, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21872287491957346, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21872287491957346, "eval_signal/advantage_abs_mean": 0.2603100687265396, "eval_signal/advantage_pre_scale_abs_mean": 0.2603100687265396, "eval_signal/advantage_pre_scale_std": 0.30781379838784534, "eval_signal/advantage_std": 0.30781379838784534, "eval_signal/batch_coverage_0/centered_abs_mean": 0.2784964566429456, "eval_signal/batch_coverage_0/group_std_mean": 0.42198194066683453, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02784964597473542, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.02784964597473542, "eval_signal/batch_coverage_1/centered_abs_mean": 0.2784964566429456, "eval_signal/batch_coverage_1/group_std_mean": 0.42198194066683453, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02784964597473542, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.02784964597473542, "eval_signal/batch_coverage_10/centered_abs_mean": 0.26380258798599243, "eval_signal/batch_coverage_10/group_std_mean": 0.38416793445746106, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02638025985409816, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.02638025985409816, "eval_signal/batch_coverage_15/centered_abs_mean": 0.26254650205373764, "eval_signal/batch_coverage_15/group_std_mean": 0.3666886240243912, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.026254651757578056, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.026254651757578056, "eval_signal/batch_coverage_20/centered_abs_mean": 0.2590629483262698, "eval_signal/batch_coverage_20/group_std_mean": 0.3404406060775121, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.025906294273833435, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.025906294273833435, "eval_signal/batch_coverage_25/centered_abs_mean": 0.32382526497046155, "eval_signal/batch_coverage_25/group_std_mean": 0.39501046637694043, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.032382527055839695, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.032382527055839695, "eval_signal/batch_coverage_5/centered_abs_mean": 0.2789640575647354, "eval_signal/batch_coverage_5/group_std_mean": 0.41690531373023987, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02789640674988429, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.02789640674988429, "eval_signal/brier_reward/centered_abs_mean": 0.2922615110874176, "eval_signal/brier_reward/group_std_mean": 0.3561810503403346, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029226152536769707, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.029226152536769707, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.13250999401013056, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1647503450512886, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01325099915266037, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01325099915266037, "eval_signal/format_reward/centered_abs_mean": 0.02501085043574373, "eval_signal/format_reward/group_std_mean": 0.06767813830326001, "eval_signal/format_reward/group_zero_std_frac": 0.6388889104127884, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012505425217871865, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012505425217871865, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002747349014195303, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004716326482594013, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.434186419326579e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.434186419326579e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0681057870388031, "eval_signal/frontier_ece_reward/group_std_mean": 0.08917686839898427, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006810579060887297, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006810579060887297, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.02501085043574373, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.06767813830326001, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6388889104127884, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00250108518715327, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00250108518715327, "eval_steps_per_second": 0.03, "step": 50 }, { "calibration/aurc": 0.23900662951911134, "calibration/batch_distribution_entropy": 0.6892578373116545, "calibration/buffer_distribution_entropy": 0.5916157765780743, "calibration/confidence_entropy": 0.2553431192007102, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1387434554973822, "calibration/coverage@15%": 0.16125654450261778, "calibration/coverage@20%": 0.5184954532929181, "calibration/coverage@25%": 0.677514619883041, "calibration/coverage@30%": 0.8519459409679054, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19219343225644983, "calibration/mean_confidence": 0.7162487190539071, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017013888888888905, "completions/max_length": 3640.6, "completions/max_terminated_length": 3640.6, "completions/mean_length": 801.1100708007813, "completions/mean_terminated_length": 815.0879760742188, "completions/min_length": 0.0, "completions/min_terminated_length": 194.2, "epoch": 0.13199835002062474, "grad_norm": 0.0007020493503659964, "learning_rate": 4.60843373493976e-06, "loss": -0.0171, "num_tokens": 109401982.0, "reward": 1.2057682991027832, "reward_std": 0.2689946472644806, "rewards/accuracy_reward": 0.6462673544883728, "rewards/batch_coverage_0": 0.28498372435569763, "rewards/batch_coverage_1": 0.28498372435569763, "rewards/batch_coverage_10": 0.43428103923797606, "rewards/batch_coverage_15": 0.44705964922904967, "rewards/batch_coverage_20": 0.4741877973079681, "rewards/batch_coverage_25": 0.4844040036201477, "rewards/batch_coverage_5": 0.38492555618286134, "rewards/brier_reward": 0.760375726222992, "rewards/confidence_uniqueness_reward": 0.8418051481246949, "rewards/format_reward": 0.9821180582046509, "rewards/frontier_aurc_reward": -0.002279853168874979, "rewards/frontier_ece_reward": 0.03188092932105065, "rewards/frontier_entropy_batch_reward": -0.5128462076187134, "signal/accuracy_reward/centered_abs_mean": 0.18359917402267456, "signal/accuracy_reward/group_std_mean": 0.24245889782905578, "signal/accuracy_reward/group_zero_std_frac": 0.30555555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09179958701133728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09179958701133728, "signal/advantage_abs_mean": 0.20495306551456452, "signal/advantage_pre_scale_abs_mean": 0.20495306551456452, "signal/advantage_pre_scale_std": 0.2879967331886292, "signal/advantage_std": 0.2879967331886292, "signal/batch_coverage_0/centered_abs_mean": 0.21537235081195832, "signal/batch_coverage_0/group_std_mean": 0.2897315204143524, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.021537235751748086, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.021537235751748086, "signal/batch_coverage_1/centered_abs_mean": 0.21537235081195832, "signal/batch_coverage_1/group_std_mean": 0.2897315204143524, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.021537235751748086, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.021537235751748086, "signal/batch_coverage_10/centered_abs_mean": 0.2431473821401596, "signal/batch_coverage_10/group_std_mean": 0.3251115679740906, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02431473843753338, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.02431473843753338, "signal/batch_coverage_15/centered_abs_mean": 0.2471532106399536, "signal/batch_coverage_15/group_std_mean": 0.3280351758003235, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.024715321511030196, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.024715321511030196, "signal/batch_coverage_20/centered_abs_mean": 0.26155039072036745, "signal/batch_coverage_20/group_std_mean": 0.3446295440196991, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02615503966808319, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02615503966808319, "signal/batch_coverage_25/centered_abs_mean": 0.26704921424388883, "signal/batch_coverage_25/group_std_mean": 0.35001180768013, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.026704922690987586, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.026704922690987586, "signal/batch_coverage_5/centered_abs_mean": 0.23431369364261628, "signal/batch_coverage_5/group_std_mean": 0.31417744755744936, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02343137003481388, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.02343137003481388, "signal/brier_reward/centered_abs_mean": 0.21910977661609649, "signal/brier_reward/group_std_mean": 0.2725955665111542, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021910977363586426, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021910977363586426, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11237560659646988, "signal/confidence_uniqueness_reward/group_std_mean": 0.14760001599788666, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011237560398876667, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011237560398876667, "signal/format_reward/centered_abs_mean": 0.02948133647441864, "signal/format_reward/group_std_mean": 0.055926169455051425, "signal/format_reward/group_zero_std_frac": 0.7694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01474066823720932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01474066823720932, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023909094743430614, "signal/frontier_aurc_reward/group_std_mean": 0.0037250214256346224, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.988636879308615e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.988636879308615e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.055272321403026584, "signal/frontier_ece_reward/group_std_mean": 0.0705582395195961, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005527231935411691, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005527231935411691, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4059657871723175, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4828205406665802, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.008333333395421505, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04059657901525497, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04059657901525497, "step": 55 }, { "calibration/aurc": 0.27999538708938243, "calibration/batch_distribution_entropy": 0.6776643316316128, "calibration/buffer_distribution_entropy": 0.6025841857243535, "calibration/confidence_entropy": 0.256365245569243, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.08167539267015707, "calibration/coverage@15%": 0.34397905759162306, "calibration/coverage@20%": 0.4451284034867176, "calibration/coverage@25%": 0.49979416762215256, "calibration/coverage@30%": 0.5467018469656992, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19908587966332278, "calibration/mean_confidence": 0.7527650724916545, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3794.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 789.0218017578125, "completions/mean_terminated_length": 799.2080444335937, "completions/min_length": 0.0, "completions/min_terminated_length": 218.6, "epoch": 0.14399820002249972, "grad_norm": 0.0007612982881255448, "learning_rate": 4.457831325301205e-06, "loss": -0.0164, "num_tokens": 121588089.0, "reward": 1.2023053884506225, "reward_std": 0.24645790457725525, "rewards/accuracy_reward": 0.6163194298744201, "rewards/batch_coverage_0": 0.30756841897964476, "rewards/batch_coverage_1": 0.30756841897964476, "rewards/batch_coverage_10": 0.4529237329959869, "rewards/batch_coverage_15": 0.4701856553554535, "rewards/batch_coverage_20": 0.49019448161125184, "rewards/batch_coverage_25": 0.5011613070964813, "rewards/batch_coverage_5": 0.3910254955291748, "rewards/brier_reward": 0.7737071990966797, "rewards/confidence_uniqueness_reward": 0.8230122208595276, "rewards/format_reward": 0.9872395873069764, "rewards/frontier_aurc_reward": -0.0027119277510792017, "rewards/frontier_ece_reward": 0.03474101237952709, "rewards/frontier_entropy_batch_reward": -0.5464899241924286, "signal/accuracy_reward/centered_abs_mean": 0.1891059011220932, "signal/accuracy_reward/group_std_mean": 0.2480522572994232, "signal/accuracy_reward/group_zero_std_frac": 0.3027777820825577, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0945529505610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0945529505610466, "signal/advantage_abs_mean": 0.18332133889198304, "signal/advantage_pre_scale_abs_mean": 0.18332133889198304, "signal/advantage_pre_scale_std": 0.2718550503253937, "signal/advantage_std": 0.2718550503253937, "signal/batch_coverage_0/centered_abs_mean": 0.17082740664482116, "signal/batch_coverage_0/group_std_mean": 0.2339889734983444, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017082741484045982, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017082741484045982, "signal/batch_coverage_1/centered_abs_mean": 0.17082740664482116, "signal/batch_coverage_1/group_std_mean": 0.2339889734983444, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017082741484045982, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017082741484045982, "signal/batch_coverage_10/centered_abs_mean": 0.20480410158634185, "signal/batch_coverage_10/group_std_mean": 0.2818973779678345, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020480410754680635, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.020480410754680635, "signal/batch_coverage_15/centered_abs_mean": 0.21194995045661927, "signal/batch_coverage_15/group_std_mean": 0.29027042388916013, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02119499444961548, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.02119499444961548, "signal/batch_coverage_20/centered_abs_mean": 0.2243577092885971, "signal/batch_coverage_20/group_std_mean": 0.30434967279434205, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02243577167391777, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02243577167391777, "signal/batch_coverage_25/centered_abs_mean": 0.2355395793914795, "signal/batch_coverage_25/group_std_mean": 0.3164583444595337, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.023553960025310516, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.023553960025310516, "signal/batch_coverage_5/centered_abs_mean": 0.18540128767490388, "signal/batch_coverage_5/group_std_mean": 0.25556024312973025, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01854012943804264, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01854012943804264, "signal/brier_reward/centered_abs_mean": 0.18852369487285614, "signal/brier_reward/group_std_mean": 0.24252268970012664, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018852369859814644, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018852369859814644, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1297829270362854, "signal/confidence_uniqueness_reward/group_std_mean": 0.16556560397148132, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012978293374180794, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012978293374180794, "signal/format_reward/centered_abs_mean": 0.022477213479578494, "signal/format_reward/group_std_mean": 0.04559006839990616, "signal/format_reward/group_zero_std_frac": 0.800000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011238606739789247, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011238606739789247, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027325002010911703, "signal/frontier_aurc_reward/group_std_mean": 0.004154342599213124, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.415625178604387e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.415625178604387e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.047476866841316225, "signal/frontier_ece_reward/group_std_mean": 0.05966043248772621, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004747686814516783, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004747686814516783, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37659157514572145, "signal/frontier_entropy_batch_reward/group_std_mean": 0.45627657175064085, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.016666666977107523, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03765915706753731, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03765915706753731, "step": 60 }, { "calibration/aurc": 0.19163294910122897, "calibration/batch_distribution_entropy": 0.7544690242665435, "calibration/buffer_distribution_entropy": 0.6190086662203866, "calibration/confidence_entropy": 0.27602314148975093, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.05639686684073107, "calibration/coverage@10%": 0.2681954260249067, "calibration/coverage@15%": 0.5397189208834419, "calibration/coverage@20%": 0.6283629626508924, "calibration/coverage@25%": 0.6912652276881658, "calibration/coverage@30%": 0.8117131383051183, "calibration/coverage@5%": 0.1404699738903394, "calibration/ece": 0.117494904221392, "calibration/mean_confidence": 0.6757113075851048, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013107638888888884, "completions/max_length": 3759.8, "completions/max_terminated_length": 3759.8, "completions/mean_length": 784.5288208007812, "completions/mean_terminated_length": 794.96884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.1559980500243747, "grad_norm": 0.0006685956032015383, "learning_rate": 4.307228915662651e-06, "loss": -0.0141, "num_tokens": 133719909.0, "reward": 1.2486091375350952, "reward_std": 0.2386137217283249, "rewards/accuracy_reward": 0.6405381917953491, "rewards/batch_coverage_0": 0.35705705881118777, "rewards/batch_coverage_1": 0.35705705881118777, "rewards/batch_coverage_10": 0.47446699142456056, "rewards/batch_coverage_15": 0.49599921703338623, "rewards/batch_coverage_20": 0.5140182256698609, "rewards/batch_coverage_25": 0.5188945710659028, "rewards/batch_coverage_5": 0.41983569860458375, "rewards/brier_reward": 0.8078495979309082, "rewards/confidence_uniqueness_reward": 0.8686538100242615, "rewards/format_reward": 0.9868923544883728, "rewards/frontier_aurc_reward": -0.0016949245939031244, "rewards/frontier_ece_reward": 0.03309335187077522, "rewards/frontier_entropy_batch_reward": -0.4977757096290588, "signal/accuracy_reward/centered_abs_mean": 0.172509765625, "signal/accuracy_reward/group_std_mean": 0.23307301700115204, "signal/accuracy_reward/group_zero_std_frac": 0.3222222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0862548828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0862548828125, "signal/advantage_abs_mean": 0.17538017630577088, "signal/advantage_pre_scale_abs_mean": 0.17538017630577088, "signal/advantage_pre_scale_std": 0.2610613316297531, "signal/advantage_std": 0.2610613316297531, "signal/batch_coverage_0/centered_abs_mean": 0.17853089570999145, "signal/batch_coverage_0/group_std_mean": 0.2395469069480896, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017853090167045595, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017853090167045595, "signal/batch_coverage_1/centered_abs_mean": 0.17853089570999145, "signal/batch_coverage_1/group_std_mean": 0.2395469069480896, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017853090167045595, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017853090167045595, "signal/batch_coverage_10/centered_abs_mean": 0.20888802111148835, "signal/batch_coverage_10/group_std_mean": 0.27947876453399656, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020888802036643027, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.020888802036643027, "signal/batch_coverage_15/centered_abs_mean": 0.21613354980945587, "signal/batch_coverage_15/group_std_mean": 0.28842523097991946, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.021613356098532675, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.021613356098532675, "signal/batch_coverage_20/centered_abs_mean": 0.22640545964241027, "signal/batch_coverage_20/group_std_mean": 0.3013183057308197, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.022640545666217805, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.022640545666217805, "signal/batch_coverage_25/centered_abs_mean": 0.22826351821422577, "signal/batch_coverage_25/group_std_mean": 0.30373467206954957, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.022826352342963218, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.022826352342963218, "signal/batch_coverage_5/centered_abs_mean": 0.19246315062046052, "signal/batch_coverage_5/group_std_mean": 0.25800671279430387, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.019246315211057664, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.019246315211057664, "signal/brier_reward/centered_abs_mean": 0.17271728515625, "signal/brier_reward/group_std_mean": 0.22596350610256194, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017271729186177254, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017271729186177254, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10451890975236892, "signal/confidence_uniqueness_reward/group_std_mean": 0.1398794487118721, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010451891273260117, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010451891273260117, "signal/format_reward/centered_abs_mean": 0.02364908903837204, "signal/format_reward/group_std_mean": 0.04641622006893158, "signal/format_reward/group_zero_std_frac": 0.8083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01182454451918602, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01182454451918602, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019812505692243577, "signal/frontier_aurc_reward/group_std_mean": 0.0031359643675386907, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4765633133938537e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4765633133938537e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.039264075458049774, "signal/frontier_ece_reward/group_std_mean": 0.05073797106742859, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039264077320694925, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039264077320694925, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36650044918060304, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44260573387145996, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.013888888992369175, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036650046706199646, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036650046706199646, "step": 65 }, { "calibration/aurc": 0.23999176408337725, "calibration/batch_distribution_entropy": 0.6836312918585741, "calibration/buffer_distribution_entropy": 0.6338147504036498, "calibration/confidence_entropy": 0.24961262377196078, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.12295514511873351, "calibration/coverage@15%": 0.2390515169772644, "calibration/coverage@20%": 0.31742437347393787, "calibration/coverage@25%": 0.44004092461640465, "calibration/coverage@30%": 0.793044473258377, "calibration/coverage@5%": 0.0891820580474934, "calibration/ece": 0.17061860165493908, "calibration/mean_confidence": 0.7117827796613386, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01675347222222221, "completions/max_length": 3543.4, "completions/max_terminated_length": 3543.4, "completions/mean_length": 777.2517456054687, "completions/mean_terminated_length": 790.4873779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 194.4, "epoch": 0.16799790002624967, "grad_norm": 0.0006346319569274783, "learning_rate": 4.156626506024097e-06, "loss": -0.0198, "num_tokens": 145751993.0, "reward": 1.2204406261444092, "reward_std": 0.23513408601284028, "rewards/accuracy_reward": 0.6188367962837219, "rewards/batch_coverage_0": 0.34071238040924073, "rewards/batch_coverage_1": 0.34071238040924073, "rewards/batch_coverage_10": 0.4735415160655975, "rewards/batch_coverage_15": 0.4864673376083374, "rewards/batch_coverage_20": 0.4995663225650787, "rewards/batch_coverage_25": 0.5093587577342987, "rewards/batch_coverage_5": 0.41144366264343263, "rewards/brier_reward": 0.7896717667579651, "rewards/confidence_uniqueness_reward": 0.8611388564109802, "rewards/format_reward": 0.9831597208976746, "rewards/frontier_aurc_reward": -0.0018688955111429094, "rewards/frontier_ece_reward": 0.030099016055464743, "rewards/frontier_entropy_batch_reward": -0.5480550169944763, "signal/accuracy_reward/centered_abs_mean": 0.176611328125, "signal/accuracy_reward/group_std_mean": 0.23280532658100128, "signal/accuracy_reward/group_zero_std_frac": 0.3388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0883056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0883056640625, "signal/advantage_abs_mean": 0.17352637350559236, "signal/advantage_pre_scale_abs_mean": 0.17352637350559236, "signal/advantage_pre_scale_std": 0.26246364414691925, "signal/advantage_std": 0.26246364414691925, "signal/batch_coverage_0/centered_abs_mean": 0.17251457870006562, "signal/batch_coverage_0/group_std_mean": 0.2311745971441269, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017251458019018173, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017251458019018173, "signal/batch_coverage_1/centered_abs_mean": 0.17251457870006562, "signal/batch_coverage_1/group_std_mean": 0.2311745971441269, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017251458019018173, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017251458019018173, "signal/batch_coverage_10/centered_abs_mean": 0.204291170835495, "signal/batch_coverage_10/group_std_mean": 0.27622554302215574, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02042911797761917, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.02042911797761917, "signal/batch_coverage_15/centered_abs_mean": 0.2088209420442581, "signal/batch_coverage_15/group_std_mean": 0.28075531125068665, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.020882094651460646, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.020882094651460646, "signal/batch_coverage_20/centered_abs_mean": 0.2121003121137619, "signal/batch_coverage_20/group_std_mean": 0.2855076462030411, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02121003195643425, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02121003195643425, "signal/batch_coverage_25/centered_abs_mean": 0.21743012368679046, "signal/batch_coverage_25/group_std_mean": 0.2922160685062408, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.021743013337254525, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.021743013337254525, "signal/batch_coverage_5/centered_abs_mean": 0.18528588414192199, "signal/batch_coverage_5/group_std_mean": 0.24809181988239287, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018528588488698004, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.018528588488698004, "signal/brier_reward/centered_abs_mean": 0.17164082825183868, "signal/brier_reward/group_std_mean": 0.2222065269947052, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017164083570241927, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017164083570241927, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1031325027346611, "signal/confidence_uniqueness_reward/group_std_mean": 0.13971158862113953, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010313250403851271, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010313250403851271, "signal/format_reward/centered_abs_mean": 0.02896050326526165, "signal/format_reward/group_std_mean": 0.05549793913960457, "signal/format_reward/group_zero_std_frac": 0.7694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014480251632630826, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014480251632630826, "signal/frontier_aurc_reward/centered_abs_mean": 0.001962455874308944, "signal/frontier_aurc_reward/group_std_mean": 0.0030693566892296075, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4530698647140524e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4530698647140524e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.035511156916618346, "signal/frontier_ece_reward/group_std_mean": 0.04602086395025253, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035511157941073178, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035511157941073178, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3593166649341583, "signal/frontier_entropy_batch_reward/group_std_mean": 0.433760666847229, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.038888888992369176, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035931666195392606, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035931666195392606, "step": 70 }, { "calibration/aurc": 0.17997203334443818, "calibration/batch_distribution_entropy": 0.5623722645304415, "calibration/buffer_distribution_entropy": 0.6359015524760088, "calibration/confidence_entropy": 0.2136270373288652, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.3683673011495868, "calibration/coverage@15%": 0.5345634641198306, "calibration/coverage@20%": 0.6111288934411654, "calibration/coverage@25%": 0.6967545723451235, "calibration/coverage@30%": 0.7372661750614506, "calibration/coverage@5%": 0.0, "calibration/ece": 0.15312949408298948, "calibration/mean_confidence": 0.7853518189875418, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015451388888888862, "completions/max_length": 3386.4, "completions/max_terminated_length": 3386.4, "completions/mean_length": 780.0697998046875, "completions/mean_terminated_length": 792.3125244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 246.4, "epoch": 0.17999775002812465, "grad_norm": 0.0005761557258665562, "learning_rate": 4.006024096385543e-06, "loss": -0.0137, "num_tokens": 157803293.0, "reward": 1.274224543571472, "reward_std": 0.2257396847009659, "rewards/accuracy_reward": 0.6721354246139526, "rewards/batch_coverage_0": 0.3722359657287598, "rewards/batch_coverage_1": 0.3722359657287598, "rewards/batch_coverage_10": 0.5174551129341125, "rewards/batch_coverage_15": 0.5403218030929565, "rewards/batch_coverage_20": 0.5625471234321594, "rewards/batch_coverage_25": 0.5715397596359253, "rewards/batch_coverage_5": 0.4612420558929443, "rewards/brier_reward": 0.8187686681747437, "rewards/confidence_uniqueness_reward": 0.8240198254585266, "rewards/format_reward": 0.9844618082046509, "rewards/frontier_aurc_reward": -0.0016686252551153303, "rewards/frontier_ece_reward": 0.03394242897629738, "rewards/frontier_entropy_batch_reward": -0.6148412227630615, "signal/accuracy_reward/centered_abs_mean": 0.17437608540058136, "signal/accuracy_reward/group_std_mean": 0.23193702697753907, "signal/accuracy_reward/group_zero_std_frac": 0.33333333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08718804270029068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08718804270029068, "signal/advantage_abs_mean": 0.16320714354515076, "signal/advantage_pre_scale_abs_mean": 0.16320714354515076, "signal/advantage_pre_scale_std": 0.2634482055902481, "signal/advantage_std": 0.2634482055902481, "signal/batch_coverage_0/centered_abs_mean": 0.13958706557750702, "signal/batch_coverage_0/group_std_mean": 0.19350030422210693, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013958707079291343, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013958707079291343, "signal/batch_coverage_1/centered_abs_mean": 0.13958706557750702, "signal/batch_coverage_1/group_std_mean": 0.19350030422210693, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013958707079291343, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013958707079291343, "signal/batch_coverage_10/centered_abs_mean": 0.17092832624912263, "signal/batch_coverage_10/group_std_mean": 0.24247340261936187, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017092833295464516, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.017092833295464516, "signal/batch_coverage_15/centered_abs_mean": 0.18111636042594909, "signal/batch_coverage_15/group_std_mean": 0.25312560200691225, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.018111636117100715, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.018111636117100715, "signal/batch_coverage_20/centered_abs_mean": 0.1944339394569397, "signal/batch_coverage_20/group_std_mean": 0.26790787279605865, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019443394616246223, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019443394616246223, "signal/batch_coverage_25/centered_abs_mean": 0.203565114736557, "signal/batch_coverage_25/group_std_mean": 0.27769856452941893, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02035651244223118, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.02035651244223118, "signal/batch_coverage_5/centered_abs_mean": 0.1548121303319931, "signal/batch_coverage_5/group_std_mean": 0.2177237719297409, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01548121329396963, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01548121329396963, "signal/brier_reward/centered_abs_mean": 0.1549970895051956, "signal/brier_reward/group_std_mean": 0.20645565688610076, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01549970917403698, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01549970917403698, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10375804156064987, "signal/confidence_uniqueness_reward/group_std_mean": 0.13831095546483993, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01037580445408821, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01037580445408821, "signal/format_reward/centered_abs_mean": 0.02744683139026165, "signal/format_reward/group_std_mean": 0.0541893906891346, "signal/format_reward/group_zero_std_frac": 0.7694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013723415695130825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013723415695130825, "signal/frontier_aurc_reward/centered_abs_mean": 0.001943652448244393, "signal/frontier_aurc_reward/group_std_mean": 0.0031304992735385895, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4295656112371943e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4295656112371943e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03213078454136849, "signal/frontier_ece_reward/group_std_mean": 0.042379957437515256, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032130784820765257, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032130784820765257, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29642507433891296, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37213156223297117, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.13055555820465087, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029642507433891296, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029642507433891296, "step": 75 }, { "calibration/aurc": 0.18531783499401064, "calibration/batch_distribution_entropy": 0.5264748528796649, "calibration/buffer_distribution_entropy": 0.6301669153476362, "calibration/confidence_entropy": 0.20656610665663813, "calibration/coverage@0%": 0.018947368421052633, "calibration/coverage@1%": 0.018947368421052633, "calibration/coverage@10%": 0.30307596167198997, "calibration/coverage@15%": 0.5106729483104138, "calibration/coverage@20%": 0.5678850159700042, "calibration/coverage@25%": 0.7134553775743708, "calibration/coverage@30%": 0.9037159420289855, "calibration/coverage@5%": 0.2714622969032079, "calibration/ece": 0.1599405047177906, "calibration/mean_confidence": 0.7885415614674296, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014670138888888906, "completions/max_length": 3726.8, "completions/max_terminated_length": 3726.8, "completions/mean_length": 823.2955688476562, "completions/mean_terminated_length": 835.6906127929688, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.19199760002999963, "grad_norm": 0.0006295799976214767, "learning_rate": 3.855421686746989e-06, "loss": -0.0148, "num_tokens": 170340938.0, "reward": 1.2308602809906006, "reward_std": 0.23434819877147675, "rewards/accuracy_reward": 0.6280381858348847, "rewards/batch_coverage_0": 0.35562955141067504, "rewards/batch_coverage_1": 0.35562955141067504, "rewards/batch_coverage_10": 0.4737295091152191, "rewards/batch_coverage_15": 0.4910382151603699, "rewards/batch_coverage_20": 0.5097373247146606, "rewards/batch_coverage_25": 0.5154181003570557, "rewards/batch_coverage_5": 0.42273095846176145, "rewards/brier_reward": 0.8010688662528992, "rewards/confidence_uniqueness_reward": 0.860322093963623, "rewards/format_reward": 0.98515625, "rewards/frontier_aurc_reward": -0.001912536984309554, "rewards/frontier_ece_reward": 0.027306798845529556, "rewards/frontier_entropy_batch_reward": -0.569741952419281, "signal/accuracy_reward/centered_abs_mean": 0.18927408754825592, "signal/accuracy_reward/group_std_mean": 0.24705808460712433, "signal/accuracy_reward/group_zero_std_frac": 0.30555555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09463704377412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09463704377412796, "signal/advantage_abs_mean": 0.17279953360557557, "signal/advantage_pre_scale_abs_mean": 0.17279953360557557, "signal/advantage_pre_scale_std": 0.2633059620857239, "signal/advantage_std": 0.2633059620857239, "signal/batch_coverage_0/centered_abs_mean": 0.16080776751041412, "signal/batch_coverage_0/group_std_mean": 0.21778478026390075, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016080777533352374, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.016080777533352374, "signal/batch_coverage_1/centered_abs_mean": 0.16080776751041412, "signal/batch_coverage_1/group_std_mean": 0.21778478026390075, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016080777533352374, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.016080777533352374, "signal/batch_coverage_10/centered_abs_mean": 0.1845082998275757, "signal/batch_coverage_10/group_std_mean": 0.2537638247013092, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01845082901418209, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01845082901418209, "signal/batch_coverage_15/centered_abs_mean": 0.19259609282016754, "signal/batch_coverage_15/group_std_mean": 0.2654243648052216, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.019259610027074815, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.019259610027074815, "signal/batch_coverage_20/centered_abs_mean": 0.2040830820798874, "signal/batch_coverage_20/group_std_mean": 0.2802187502384186, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.020408308133482933, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.020408308133482933, "signal/batch_coverage_25/centered_abs_mean": 0.21111299395561217, "signal/batch_coverage_25/group_std_mean": 0.28788386583328246, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.021111299842596055, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.021111299842596055, "signal/batch_coverage_5/centered_abs_mean": 0.17277827262878417, "signal/batch_coverage_5/group_std_mean": 0.2342999130487442, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017277827486395836, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.017277827486395836, "signal/brier_reward/centered_abs_mean": 0.16930352449417113, "signal/brier_reward/group_std_mean": 0.22312399744987488, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016930353082716464, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016930353082716464, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08183257579803467, "signal/confidence_uniqueness_reward/group_std_mean": 0.1107841432094574, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008183257654309273, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008183257654309273, "signal/format_reward/centered_abs_mean": 0.023681640066206454, "signal/format_reward/group_std_mean": 0.04482636339962483, "signal/format_reward/group_zero_std_frac": 0.8138888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011840820033103227, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011840820033103227, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017274004174396395, "signal/frontier_aurc_reward/group_std_mean": 0.002619408327154815, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.159250379918376e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.159250379918376e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.030476462468504904, "signal/frontier_ece_reward/group_std_mean": 0.040614823997020724, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030476462095975878, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030476462095975878, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3353477716445923, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4120087206363678, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09444444589316844, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03353477939963341, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03353477939963341, "step": 80 }, { "calibration/aurc": 0.15409807022690386, "calibration/batch_distribution_entropy": 0.7416203857286623, "calibration/buffer_distribution_entropy": 0.6372012889686948, "calibration/confidence_entropy": 0.27422233398342266, "calibration/coverage@0%": 0.0109375, "calibration/coverage@1%": 0.0109375, "calibration/coverage@10%": 0.4266453998978691, "calibration/coverage@15%": 0.6234678790038453, "calibration/coverage@20%": 0.6907730186501071, "calibration/coverage@25%": 0.7665074893636747, "calibration/coverage@30%": 0.8581187629948495, "calibration/coverage@5%": 0.24278833868530353, "calibration/ece": 0.11338426350473374, "calibration/mean_confidence": 0.6267695244515951, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012760416666666653, "completions/max_length": 3565.4, "completions/max_terminated_length": 3565.4, "completions/mean_length": 823.7012084960937, "completions/mean_terminated_length": 834.3744995117188, "completions/min_length": 0.0, "completions/min_terminated_length": 272.2, "epoch": 0.2039974500318746, "grad_norm": 0.0005630561499856412, "learning_rate": 3.7048192771084342e-06, "loss": -0.012, "num_tokens": 182917176.0, "reward": 1.276980185508728, "reward_std": 0.22686235904693602, "rewards/accuracy_reward": 0.6433159708976746, "rewards/batch_coverage_0": 0.38253883719444276, "rewards/batch_coverage_1": 0.38253883719444276, "rewards/batch_coverage_10": 0.5324840545654297, "rewards/batch_coverage_15": 0.548856544494629, "rewards/batch_coverage_20": 0.5570691585540771, "rewards/batch_coverage_25": 0.5611120223999023, "rewards/batch_coverage_5": 0.4730106830596924, "rewards/brier_reward": 0.8340127110481262, "rewards/confidence_uniqueness_reward": 0.8759592175483704, "rewards/format_reward": 0.9869791746139527, "rewards/frontier_aurc_reward": -0.0015283518703654408, "rewards/frontier_ece_reward": 0.027334221825003623, "rewards/frontier_entropy_batch_reward": -0.556399130821228, "signal/accuracy_reward/centered_abs_mean": 0.17159830927848815, "signal/accuracy_reward/group_std_mean": 0.22915640473365784, "signal/accuracy_reward/group_zero_std_frac": 0.3388888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08579915463924408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08579915463924408, "signal/advantage_abs_mean": 0.1645752727985382, "signal/advantage_pre_scale_abs_mean": 0.1645752727985382, "signal/advantage_pre_scale_std": 0.2576048791408539, "signal/advantage_std": 0.2576048791408539, "signal/batch_coverage_0/centered_abs_mean": 0.16249439418315886, "signal/batch_coverage_0/group_std_mean": 0.21981814205646516, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016249440237879755, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.016249440237879755, "signal/batch_coverage_1/centered_abs_mean": 0.16249439418315886, "signal/batch_coverage_1/group_std_mean": 0.21981814205646516, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016249440237879755, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.016249440237879755, "signal/batch_coverage_10/centered_abs_mean": 0.1962430626153946, "signal/batch_coverage_10/group_std_mean": 0.2668556869029999, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01962430663406849, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01962430663406849, "signal/batch_coverage_15/centered_abs_mean": 0.20319909751415252, "signal/batch_coverage_15/group_std_mean": 0.2750951647758484, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02031990997493267, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.02031990997493267, "signal/batch_coverage_20/centered_abs_mean": 0.20662610232830048, "signal/batch_coverage_20/group_std_mean": 0.27981886863708494, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02066261097788811, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02066261097788811, "signal/batch_coverage_25/centered_abs_mean": 0.20946857929229737, "signal/batch_coverage_25/group_std_mean": 0.2836865186691284, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020946857705712318, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.020946857705712318, "signal/batch_coverage_5/centered_abs_mean": 0.18017423450946807, "signal/batch_coverage_5/group_std_mean": 0.24363900423049928, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018017425015568734, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.018017425015568734, "signal/brier_reward/centered_abs_mean": 0.1504698157310486, "signal/brier_reward/group_std_mean": 0.20289478003978728, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01504698134958744, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01504698134958744, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07075797319412232, "signal/confidence_uniqueness_reward/group_std_mean": 0.10061978846788407, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007075797766447067, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007075797766447067, "signal/format_reward/centered_abs_mean": 0.02328558973968029, "signal/format_reward/group_std_mean": 0.047786331921815875, "signal/format_reward/group_zero_std_frac": 0.7888889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011642794869840146, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011642794869840146, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015381898963823914, "signal/frontier_aurc_reward/group_std_mean": 0.002280404232442379, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.922737301356392e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.922737301356392e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.028247393295168878, "signal/frontier_ece_reward/group_std_mean": 0.03832782506942749, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028247392736375334, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028247392736375334, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.330600780248642, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41186041831970216, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06388889066874981, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03306007869541645, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03306007869541645, "step": 85 }, { "calibration/aurc": 0.1409074712236842, "calibration/batch_distribution_entropy": 0.676989785966493, "calibration/buffer_distribution_entropy": 0.6450539204074888, "calibration/confidence_entropy": 0.24289489628080269, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4740362776214056, "calibration/coverage@15%": 0.6774752795708263, "calibration/coverage@20%": 0.7559175357069725, "calibration/coverage@25%": 0.8221792826805896, "calibration/coverage@30%": 0.8984341667054151, "calibration/coverage@5%": 0.3210218467549785, "calibration/ece": 0.11206175796901677, "calibration/mean_confidence": 0.703699617406267, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00980902777777779, "completions/max_length": 3131.6, "completions/max_terminated_length": 3131.6, "completions/mean_length": 805.4808349609375, "completions/mean_terminated_length": 813.4924682617187, "completions/min_length": 0.0, "completions/min_terminated_length": 232.4, "epoch": 0.2159973000337496, "grad_norm": 0.0006051440723240376, "learning_rate": 3.5542168674698798e-06, "loss": -0.0106, "num_tokens": 195264987.0, "reward": 1.2629518032073974, "reward_std": 0.21908413171768187, "rewards/accuracy_reward": 0.6404513835906982, "rewards/batch_coverage_0": 0.37480852007865906, "rewards/batch_coverage_1": 0.37480852007865906, "rewards/batch_coverage_10": 0.5070814311504364, "rewards/batch_coverage_15": 0.5261907279491425, "rewards/batch_coverage_20": 0.5442143380641937, "rewards/batch_coverage_25": 0.5515429675579071, "rewards/batch_coverage_5": 0.45254968404769896, "rewards/brier_reward": 0.8282647132873535, "rewards/confidence_uniqueness_reward": 0.8380447387695312, "rewards/format_reward": 0.9901041746139526, "rewards/frontier_aurc_reward": -0.0015266900416463613, "rewards/frontier_ece_reward": 0.027132243290543555, "rewards/frontier_entropy_batch_reward": -0.547706913948059, "signal/accuracy_reward/centered_abs_mean": 0.17931857705116272, "signal/accuracy_reward/group_std_mean": 0.2344184249639511, "signal/accuracy_reward/group_zero_std_frac": 0.33611112236976626, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08965928852558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08965928852558136, "signal/advantage_abs_mean": 0.15935106277465821, "signal/advantage_pre_scale_abs_mean": 0.15935106277465821, "signal/advantage_pre_scale_std": 0.2506345182657242, "signal/advantage_std": 0.2506345182657242, "signal/batch_coverage_0/centered_abs_mean": 0.15178050100803375, "signal/batch_coverage_0/group_std_mean": 0.20413313806056976, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.015178050845861435, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.015178050845861435, "signal/batch_coverage_1/centered_abs_mean": 0.15178050100803375, "signal/batch_coverage_1/group_std_mean": 0.20413313806056976, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.015178050845861435, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.015178050845861435, "signal/batch_coverage_10/centered_abs_mean": 0.1780288428068161, "signal/batch_coverage_10/group_std_mean": 0.24395250976085664, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017802884429693223, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.017802884429693223, "signal/batch_coverage_15/centered_abs_mean": 0.1861760824918747, "signal/batch_coverage_15/group_std_mean": 0.2549364984035492, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01861760877072811, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01861760877072811, "signal/batch_coverage_20/centered_abs_mean": 0.1985442578792572, "signal/batch_coverage_20/group_std_mean": 0.26970677971839907, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019854425638914108, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019854425638914108, "signal/batch_coverage_25/centered_abs_mean": 0.20626583695411682, "signal/batch_coverage_25/group_std_mean": 0.2787212669849396, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020626583695411684, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.020626583695411684, "signal/batch_coverage_5/centered_abs_mean": 0.16467028558254243, "signal/batch_coverage_5/group_std_mean": 0.22229004502296448, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016467029228806496, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.016467029228806496, "signal/brier_reward/centered_abs_mean": 0.1509499132633209, "signal/brier_reward/group_std_mean": 0.20125450193881989, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015094991773366928, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015094991773366928, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09308070093393325, "signal/confidence_uniqueness_reward/group_std_mean": 0.12461533695459366, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009308070316910744, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009308070316910744, "signal/format_reward/centered_abs_mean": 0.018164062313735484, "signal/format_reward/group_std_mean": 0.04039783589541912, "signal/format_reward/group_zero_std_frac": 0.8111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009082031156867742, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009082031156867742, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018176564015448093, "signal/frontier_aurc_reward/group_std_mean": 0.0029703597305342556, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2720704691892024e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2720704691892024e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.026793276891112328, "signal/frontier_ece_reward/group_std_mean": 0.0369697593152523, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026793277356773615, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026793277356773615, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31506091356277466, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39477880001068116, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08055555745959282, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031506090238690374, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031506090238690374, "step": 90 }, { "calibration/aurc": 0.18347185980137212, "calibration/batch_distribution_entropy": 0.6614250966982602, "calibration/buffer_distribution_entropy": 0.6505126033972186, "calibration/confidence_entropy": 0.21783652138555804, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5036943404007126, "calibration/coverage@15%": 0.5772734599629088, "calibration/coverage@20%": 0.6441130233893346, "calibration/coverage@25%": 0.6854127650267248, "calibration/coverage@30%": 0.7084193402867962, "calibration/coverage@5%": 0.30512077426763085, "calibration/ece": 0.14312376310403513, "calibration/mean_confidence": 0.6829416717300276, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006336805555555558, "completions/max_length": 3176.8, "completions/max_terminated_length": 3176.8, "completions/mean_length": 819.3296997070313, "completions/mean_terminated_length": 824.5814819335938, "completions/min_length": 0.0, "completions/min_terminated_length": 235.8, "epoch": 0.22799715003562457, "grad_norm": 0.0005972707294858992, "learning_rate": 3.4036144578313257e-06, "loss": -0.006, "num_tokens": 207795345.0, "reward": 1.2796026945114136, "reward_std": 0.21020272970199586, "rewards/accuracy_reward": 0.6392361164093018, "rewards/batch_coverage_0": 0.4010293483734131, "rewards/batch_coverage_1": 0.4010293483734131, "rewards/batch_coverage_10": 0.5425745368003845, "rewards/batch_coverage_15": 0.5669065713882446, "rewards/batch_coverage_20": 0.575879442691803, "rewards/batch_coverage_25": 0.5806684374809266, "rewards/batch_coverage_5": 0.47908039689064025, "rewards/brier_reward": 0.8297713875770569, "rewards/confidence_uniqueness_reward": 0.7985679268836975, "rewards/format_reward": 0.9936631917953491, "rewards/frontier_aurc_reward": -0.001917445962317288, "rewards/frontier_ece_reward": 0.026556022092700006, "rewards/frontier_entropy_batch_reward": -0.5702940583229065, "signal/accuracy_reward/centered_abs_mean": 0.16852213442325592, "signal/accuracy_reward/group_std_mean": 0.21984477639198302, "signal/accuracy_reward/group_zero_std_frac": 0.3833333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08426106721162796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08426106721162796, "signal/advantage_abs_mean": 0.15105001330375672, "signal/advantage_pre_scale_abs_mean": 0.15105001330375672, "signal/advantage_pre_scale_std": 0.2421530544757843, "signal/advantage_std": 0.2421530544757843, "signal/batch_coverage_0/centered_abs_mean": 0.1473347067832947, "signal/batch_coverage_0/group_std_mean": 0.2022853225469589, "signal/batch_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014733470790088177, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.014733470790088177, "signal/batch_coverage_1/centered_abs_mean": 0.1473347067832947, "signal/batch_coverage_1/group_std_mean": 0.2022853225469589, "signal/batch_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014733470790088177, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.014733470790088177, "signal/batch_coverage_10/centered_abs_mean": 0.17430627048015596, "signal/batch_coverage_10/group_std_mean": 0.24450170993804932, "signal/batch_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01743062734603882, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01743062734603882, "signal/batch_coverage_15/centered_abs_mean": 0.18629721999168397, "signal/batch_coverage_15/group_std_mean": 0.2586580038070679, "signal/batch_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01862972229719162, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01862972229719162, "signal/batch_coverage_20/centered_abs_mean": 0.19263581037521363, "signal/batch_coverage_20/group_std_mean": 0.26611111760139466, "signal/batch_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019263581931591035, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019263581931591035, "signal/batch_coverage_25/centered_abs_mean": 0.19736669659614564, "signal/batch_coverage_25/group_std_mean": 0.2709603726863861, "signal/batch_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.019736670702695847, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.019736670702695847, "signal/batch_coverage_5/centered_abs_mean": 0.15937765538692475, "signal/batch_coverage_5/group_std_mean": 0.2213836669921875, "signal/batch_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015937766060233116, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.015937766060233116, "signal/brier_reward/centered_abs_mean": 0.14500512927770615, "signal/brier_reward/group_std_mean": 0.1952369064092636, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014500514045357705, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014500514045357705, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1249560296535492, "signal/confidence_uniqueness_reward/group_std_mean": 0.15818934738636017, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012495603226125241, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012495603226125241, "signal/format_reward/centered_abs_mean": 0.01147460942156613, "signal/format_reward/group_std_mean": 0.027114569582045077, "signal/format_reward/group_zero_std_frac": 0.8666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005737304710783065, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005737304710783065, "signal/frontier_aurc_reward/centered_abs_mean": 0.002263281703926623, "signal/frontier_aurc_reward/group_std_mean": 0.0033607793506234885, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8291022317716852e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8291022317716852e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.025421417877078058, "signal/frontier_ece_reward/group_std_mean": 0.03576734662055969, "signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002542141871526837, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002542141871526837, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3209464192390442, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40138447284698486, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06944444514811039, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032094644755125044, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032094644755125044, "step": 95 }, { "calibration/aurc": 0.14871540129140545, "calibration/batch_distribution_entropy": 0.7226482654847846, "calibration/buffer_distribution_entropy": 0.6533304306820955, "calibration/confidence_entropy": 0.2790756192218355, "calibration/coverage@0%": 0.022273176238182587, "calibration/coverage@1%": 0.022273176238182587, "calibration/coverage@10%": 0.384944260136185, "calibration/coverage@15%": 0.550066165769163, "calibration/coverage@20%": 0.7874869814759887, "calibration/coverage@25%": 0.8813192236663167, "calibration/coverage@30%": 0.9200326500742367, "calibration/coverage@5%": 0.22022361653824696, "calibration/ece": 0.10801010513597523, "calibration/mean_confidence": 0.6640190515941942, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00902777777777779, "completions/max_length": 3604.8, "completions/max_terminated_length": 3604.8, "completions/mean_length": 849.1434204101563, "completions/mean_terminated_length": 856.893017578125, "completions/min_length": 0.0, "completions/min_terminated_length": 251.8, "epoch": 0.23999700003749952, "grad_norm": 0.0005327390390448272, "learning_rate": 3.2530120481927713e-06, "loss": -0.0083, "num_tokens": 220676549.0, "reward": 1.2861318826675414, "reward_std": 0.2074308067560196, "rewards/accuracy_reward": 0.6497395873069763, "rewards/batch_coverage_0": 0.406613689661026, "rewards/batch_coverage_1": 0.406613689661026, "rewards/batch_coverage_10": 0.5238588929176331, "rewards/batch_coverage_15": 0.5399280548095703, "rewards/batch_coverage_20": 0.5521279692649841, "rewards/batch_coverage_25": 0.5576201200485229, "rewards/batch_coverage_5": 0.48190149664878845, "rewards/brier_reward": 0.8477397084236145, "rewards/confidence_uniqueness_reward": 0.8880740642547608, "rewards/format_reward": 0.990711796283722, "rewards/frontier_aurc_reward": -0.0011223536217585205, "rewards/frontier_ece_reward": 0.0243696141988039, "rewards/frontier_entropy_batch_reward": -0.5696449875831604, "signal/accuracy_reward/centered_abs_mean": 0.17262912094593047, "signal/accuracy_reward/group_std_mean": 0.22270674109458924, "signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08631456047296523, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08631456047296523, "signal/advantage_abs_mean": 0.14957348108291627, "signal/advantage_pre_scale_abs_mean": 0.14957348108291627, "signal/advantage_pre_scale_std": 0.23985751271247863, "signal/advantage_std": 0.23985751271247863, "signal/batch_coverage_0/centered_abs_mean": 0.14382635056972504, "signal/batch_coverage_0/group_std_mean": 0.19585117995738982, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014382634684443474, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.014382634684443474, "signal/batch_coverage_1/centered_abs_mean": 0.14382635056972504, "signal/batch_coverage_1/group_std_mean": 0.19585117995738982, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014382634684443474, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.014382634684443474, "signal/batch_coverage_10/centered_abs_mean": 0.1680520087480545, "signal/batch_coverage_10/group_std_mean": 0.23160885274410248, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016805201396346094, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016805201396346094, "signal/batch_coverage_15/centered_abs_mean": 0.17198814153671266, "signal/batch_coverage_15/group_std_mean": 0.23695962727069855, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01719881482422352, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01719881482422352, "signal/batch_coverage_20/centered_abs_mean": 0.17785200774669646, "signal/batch_coverage_20/group_std_mean": 0.2448043555021286, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017785200849175453, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017785200849175453, "signal/batch_coverage_25/centered_abs_mean": 0.18249513506889342, "signal/batch_coverage_25/group_std_mean": 0.24947845041751862, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01824951358139515, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01824951358139515, "signal/batch_coverage_5/centered_abs_mean": 0.15562867522239685, "signal/batch_coverage_5/group_std_mean": 0.21346194446086883, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015562868677079678, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.015562868677079678, "signal/brier_reward/centered_abs_mean": 0.1374327450990677, "signal/brier_reward/group_std_mean": 0.1854398012161255, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013743274100124835, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013743274100124835, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06414824798703193, "signal/confidence_uniqueness_reward/group_std_mean": 0.09137060940265655, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006414824724197387, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006414824724197387, "signal/format_reward/centered_abs_mean": 0.016845703311264514, "signal/format_reward/group_std_mean": 0.0385689489543438, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008422851655632257, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008422851655632257, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011418449808843433, "signal/frontier_aurc_reward/group_std_mean": 0.0017946111736819148, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4273062515712808e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4273062515712808e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.025530267134308816, "signal/frontier_ece_reward/group_std_mean": 0.037098944932222364, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002553026657551527, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002553026657551527, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3181028127670288, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3966158747673035, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06944444645196199, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031810281053185466, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031810281053185466, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.15069789295964053, "eval_calibration/batch_distribution_entropy": 0.6999939081758847, "eval_calibration/buffer_distribution_entropy": 0.6590732287086882, "eval_calibration/confidence_entropy": 0.2877143448048169, "eval_calibration/coverage@0%": 0.2820900537634408, "eval_calibration/coverage@1%": 0.2820900537634408, "eval_calibration/coverage@10%": 0.3706317204301075, "eval_calibration/coverage@15%": 0.5504928315412186, "eval_calibration/coverage@20%": 0.7736447132616489, "eval_calibration/coverage@25%": 0.8738015232974911, "eval_calibration/coverage@30%": 0.9583333333333334, "eval_calibration/coverage@5%": 0.3185483870967742, "eval_calibration/ece": 0.17312003647113128, "eval_calibration/mean_confidence": 0.6685240156767175, "eval_completions/clipped_ratio": 0.00868055555555558, "eval_completions/max_length": 2403.3333333333335, "eval_completions/max_terminated_length": 2403.3333333333335, "eval_completions/mean_length": 861.4699910481771, "eval_completions/mean_terminated_length": 868.889882405599, "eval_completions/min_length": 115.66666666666667, "eval_completions/min_terminated_length": 322.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 220676549.0, "eval_reward": 1.0047101080417633, "eval_reward_std": 0.29811317722002667, "eval_rewards/accuracy_reward": 0.6388888955116272, "eval_rewards/batch_coverage_0": 0.12254629905025165, "eval_rewards/batch_coverage_1": 0.12254629905025165, "eval_rewards/batch_coverage_10": 0.12550141910711923, "eval_rewards/batch_coverage_15": 0.16085403288404146, "eval_rewards/batch_coverage_20": 0.22410336136817932, "eval_rewards/batch_coverage_25": 0.29980478684107464, "eval_rewards/batch_coverage_5": 0.12447089081009229, "eval_rewards/brier_reward": 0.8346987068653107, "eval_rewards/confidence_uniqueness_reward": 0.8586996992429098, "eval_rewards/format_reward": 0.9895833432674408, "eval_rewards/frontier_aurc_reward": -0.001299240723407517, "eval_rewards/frontier_ece_reward": 0.021259518650670845, "eval_rewards/frontier_entropy_batch_reward": -0.9895833432674408, "eval_runtime": 196.7066, "eval_samples_per_second": 5.084, "eval_signal/accuracy_reward/centered_abs_mean": 0.4511718700329463, "eval_signal/accuracy_reward/group_std_mean": 0.48224946359793347, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22558593501647314, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22558593501647314, "eval_signal/advantage_abs_mean": 0.24452855934699377, "eval_signal/advantage_pre_scale_abs_mean": 0.24452855934699377, "eval_signal/advantage_pre_scale_std": 0.29801690578460693, "eval_signal/advantage_std": 0.29801690578460693, "eval_signal/batch_coverage_0/centered_abs_mean": 0.2687097614010175, "eval_signal/batch_coverage_0/group_std_mean": 0.372615580757459, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.026870975581308205, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.026870975581308205, "eval_signal/batch_coverage_1/centered_abs_mean": 0.2687097614010175, "eval_signal/batch_coverage_1/group_std_mean": 0.372615580757459, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.026870975581308205, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.026870975581308205, "eval_signal/batch_coverage_10/centered_abs_mean": 0.2330656349658966, "eval_signal/batch_coverage_10/group_std_mean": 0.32191962500413257, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.023306564427912235, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.023306564427912235, "eval_signal/batch_coverage_15/centered_abs_mean": 0.22498319546381632, "eval_signal/batch_coverage_15/group_std_mean": 0.29725244144598645, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.022498319546381634, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.022498319546381634, "eval_signal/batch_coverage_20/centered_abs_mean": 0.273581658800443, "eval_signal/batch_coverage_20/group_std_mean": 0.33339178065458935, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.027358165321250755, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.027358165321250755, "eval_signal/batch_coverage_25/centered_abs_mean": 0.35976608097553253, "eval_signal/batch_coverage_25/group_std_mean": 0.4202146033445994, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.035976607662936054, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.035976607662936054, "eval_signal/batch_coverage_5/centered_abs_mean": 0.26241545875867206, "eval_signal/batch_coverage_5/group_std_mean": 0.3641966183980306, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.026241544944544632, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.026241544944544632, "eval_signal/brier_reward/centered_abs_mean": 0.2232651188969612, "eval_signal/brier_reward/group_std_mean": 0.2941201577583949, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02232651226222515, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02232651226222515, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07532106898725033, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10399662268658479, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007532107022901376, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007532107022901376, "eval_signal/format_reward/centered_abs_mean": 0.019856771143774193, "eval_signal/format_reward/group_std_mean": 0.049957338720560074, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009928385571887096, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009928385571887096, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0018068959082787235, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0034212637498664358, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2586199823611725e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2586199823611725e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.029543430544435978, "eval_signal/frontier_ece_reward/group_std_mean": 0.0442526334275802, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0029543431010097265, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0029543431010097265, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.019856771143774193, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.049957338720560074, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.001985677246314784, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.001985677246314784, "eval_steps_per_second": 0.031, "step": 100 }, { "calibration/aurc": 0.27275814168966306, "calibration/batch_distribution_entropy": 0.6649876038103061, "calibration/buffer_distribution_entropy": 0.6616258775307035, "calibration/confidence_entropy": 0.2404086982419873, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.18453443148021048, "calibration/coverage@15%": 0.21867993594143215, "calibration/coverage@20%": 0.42895934173473194, "calibration/coverage@25%": 0.4990613706952415, "calibration/coverage@30%": 0.5436143262171786, "calibration/coverage@5%": 0.1546328071379547, "calibration/ece": 0.16556052440644764, "calibration/mean_confidence": 0.6860977484739768, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00972222222222221, "completions/max_length": 3273.8, "completions/max_terminated_length": 3273.8, "completions/mean_length": 859.6019165039063, "completions/mean_terminated_length": 868.0409423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 260.0, "epoch": 0.2519968500393745, "grad_norm": 0.0006035579717718065, "learning_rate": 3.1024096385542172e-06, "loss": -0.0095, "num_tokens": 233656027.0, "reward": 1.2749878406524657, "reward_std": 0.20778575241565705, "rewards/accuracy_reward": 0.6319444417953491, "rewards/batch_coverage_0": 0.4026776671409607, "rewards/batch_coverage_1": 0.4026776671409607, "rewards/batch_coverage_10": 0.5386986613273621, "rewards/batch_coverage_15": 0.5527087450027466, "rewards/batch_coverage_20": 0.5678210139274598, "rewards/batch_coverage_25": 0.5734583020210267, "rewards/batch_coverage_5": 0.47398912310600283, "rewards/brier_reward": 0.8429219245910644, "rewards/confidence_uniqueness_reward": 0.8753986597061157, "rewards/format_reward": 0.9901041626930237, "rewards/frontier_aurc_reward": -0.0013104168232530356, "rewards/frontier_ece_reward": 0.02432958744466305, "rewards/frontier_entropy_batch_reward": -0.6148821473121643, "signal/accuracy_reward/centered_abs_mean": 0.16476779580116271, "signal/accuracy_reward/group_std_mean": 0.21900846362113952, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08238389790058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08238389790058136, "signal/advantage_abs_mean": 0.15073422193527222, "signal/advantage_pre_scale_abs_mean": 0.15073422193527222, "signal/advantage_pre_scale_std": 0.2431449383497238, "signal/advantage_std": 0.2431449383497238, "signal/batch_coverage_0/centered_abs_mean": 0.13495715856552123, "signal/batch_coverage_0/group_std_mean": 0.18429154455661773, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013495716080069543, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013495716080069543, "signal/batch_coverage_1/centered_abs_mean": 0.13495715856552123, "signal/batch_coverage_1/group_std_mean": 0.18429154455661773, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013495716080069543, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013495716080069543, "signal/batch_coverage_10/centered_abs_mean": 0.1662070333957672, "signal/batch_coverage_10/group_std_mean": 0.2270430624485016, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01662070322781801, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01662070322781801, "signal/batch_coverage_15/centered_abs_mean": 0.17070157825946808, "signal/batch_coverage_15/group_std_mean": 0.23345741331577302, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.017070158571004867, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.017070158571004867, "signal/batch_coverage_20/centered_abs_mean": 0.18138117492198944, "signal/batch_coverage_20/group_std_mean": 0.24707195162773132, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.018138118088245392, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.018138118088245392, "signal/batch_coverage_25/centered_abs_mean": 0.18659622073173524, "signal/batch_coverage_25/group_std_mean": 0.2526919931173325, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.018659623339772224, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.018659623339772224, "signal/batch_coverage_5/centered_abs_mean": 0.1455353856086731, "signal/batch_coverage_5/group_std_mean": 0.19852421581745147, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.014553537964820862, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.014553537964820862, "signal/brier_reward/centered_abs_mean": 0.1374508783221245, "signal/brier_reward/group_std_mean": 0.1849253445863724, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013745087757706642, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013745087757706642, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0681469388306141, "signal/confidence_uniqueness_reward/group_std_mean": 0.0911123812198639, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006814693752676249, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006814693752676249, "signal/format_reward/centered_abs_mean": 0.01706814244389534, "signal/format_reward/group_std_mean": 0.033369756489992144, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00853407122194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00853407122194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012912088888697327, "signal/frontier_aurc_reward/group_std_mean": 0.002038556197658181, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.614011162018869e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.614011162018869e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.025292347371578216, "signal/frontier_ece_reward/group_std_mean": 0.03649614751338959, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025292347185313702, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025292347185313702, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32601881623268125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4004940211772919, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08055555745959282, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03260188177227974, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03260188177227974, "step": 105 }, { "calibration/aurc": 0.13600304996634355, "calibration/batch_distribution_entropy": 0.5387418942015992, "calibration/buffer_distribution_entropy": 0.6603980686405777, "calibration/confidence_entropy": 0.1700767895993173, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.06649214659685863, "calibration/coverage@10%": 0.4305625432228937, "calibration/coverage@15%": 0.6513986521999992, "calibration/coverage@20%": 0.7441999017551388, "calibration/coverage@25%": 0.8238441682474911, "calibration/coverage@30%": 0.8981990649806256, "calibration/coverage@5%": 0.23545211605584643, "calibration/ece": 0.14491282588435714, "calibration/mean_confidence": 0.7337704927321872, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007899305555555559, "completions/max_length": 3590.2, "completions/max_terminated_length": 3590.2, "completions/mean_length": 890.5359497070312, "completions/mean_terminated_length": 897.662841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 279.6, "epoch": 0.2639967000412495, "grad_norm": 0.0005900281248614192, "learning_rate": 2.9518072289156627e-06, "loss": -0.0087, "num_tokens": 247023449.0, "reward": 1.3070582389831542, "reward_std": 0.20293028652668, "rewards/accuracy_reward": 0.6640624880790711, "rewards/batch_coverage_0": 0.4146144509315491, "rewards/batch_coverage_1": 0.4146144509315491, "rewards/batch_coverage_10": 0.5749109148979187, "rewards/batch_coverage_15": 0.5923280835151672, "rewards/batch_coverage_20": 0.6081398606300354, "rewards/batch_coverage_25": 0.6159737706184387, "rewards/batch_coverage_5": 0.5078445851802826, "rewards/brier_reward": 0.8550400853157043, "rewards/confidence_uniqueness_reward": 0.8103942751884461, "rewards/format_reward": 0.9920138835906982, "rewards/frontier_aurc_reward": -0.0012431937037035822, "rewards/frontier_ece_reward": 0.0255121573805809, "rewards/frontier_entropy_batch_reward": -0.6290174722671509, "signal/accuracy_reward/centered_abs_mean": 0.1595486134290695, "signal/accuracy_reward/group_std_mean": 0.21389709711074828, "signal/accuracy_reward/group_zero_std_frac": 0.375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07977430671453475, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07977430671453475, "signal/advantage_abs_mean": 0.1432841181755066, "signal/advantage_pre_scale_abs_mean": 0.1432841181755066, "signal/advantage_pre_scale_std": 0.23976631164550782, "signal/advantage_std": 0.23976631164550782, "signal/batch_coverage_0/centered_abs_mean": 0.1273946210741997, "signal/batch_coverage_0/group_std_mean": 0.17782883048057557, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012739462591707707, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012739462591707707, "signal/batch_coverage_1/centered_abs_mean": 0.1273946210741997, "signal/batch_coverage_1/group_std_mean": 0.17782883048057557, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012739462591707707, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012739462591707707, "signal/batch_coverage_10/centered_abs_mean": 0.1554081290960312, "signal/batch_coverage_10/group_std_mean": 0.22439009249210357, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015540812350809573, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.015540812350809573, "signal/batch_coverage_15/centered_abs_mean": 0.16385475397109986, "signal/batch_coverage_15/group_std_mean": 0.23465842604637147, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016385475546121596, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016385475546121596, "signal/batch_coverage_20/centered_abs_mean": 0.1737336367368698, "signal/batch_coverage_20/group_std_mean": 0.24739258289337157, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017373364046216012, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017373364046216012, "signal/batch_coverage_25/centered_abs_mean": 0.18111605048179627, "signal/batch_coverage_25/group_std_mean": 0.2562800019979477, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01811160519719124, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01811160519719124, "signal/batch_coverage_5/centered_abs_mean": 0.13753320872783661, "signal/batch_coverage_5/group_std_mean": 0.19644954800605774, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013753321021795273, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013753321021795273, "signal/brier_reward/centered_abs_mean": 0.13349896520376206, "signal/brier_reward/group_std_mean": 0.18303124904632567, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013349897041916848, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013349897041916848, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09162542074918748, "signal/confidence_uniqueness_reward/group_std_mean": 0.1198175922036171, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009162542037665844, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009162542037665844, "signal/format_reward/centered_abs_mean": 0.01412760429084301, "signal/format_reward/group_std_mean": 0.0280547920614481, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007063802145421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007063802145421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014921376714482903, "signal/frontier_aurc_reward/group_std_mean": 0.0024413310457021, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.865172198449727e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.865172198449727e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.021755291894078255, "signal/frontier_ece_reward/group_std_mean": 0.03146950043737888, "signal/frontier_ece_reward/group_zero_std_frac": 0.01111111119389534, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021755292546004057, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021755292546004057, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29078406691551206, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36565481424331664, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.14166666716337203, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02907840646803379, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02907840646803379, "step": 110 }, { "calibration/aurc": 0.24142886549835296, "calibration/batch_distribution_entropy": 0.6974169468707274, "calibration/buffer_distribution_entropy": 0.6602675731548552, "calibration/confidence_entropy": 0.25641283231726597, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1538266635798299, "calibration/coverage@15%": 0.45309955988777517, "calibration/coverage@20%": 0.5999502038930988, "calibration/coverage@25%": 0.6443263312855432, "calibration/coverage@30%": 0.6900952333916923, "calibration/coverage@5%": 0.04128686327077748, "calibration/ece": 0.16795997870097873, "calibration/mean_confidence": 0.665753816688968, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008420138888888883, "completions/max_length": 3502.0, "completions/max_terminated_length": 3502.0, "completions/mean_length": 925.295068359375, "completions/mean_terminated_length": 933.1673828125, "completions/min_length": 0.0, "completions/min_terminated_length": 290.6, "epoch": 0.27599655004312446, "grad_norm": 0.0004857274179812521, "learning_rate": 2.8012048192771087e-06, "loss": -0.0094, "num_tokens": 260762048.0, "reward": 1.2888626337051392, "reward_std": 0.1940772384405136, "rewards/accuracy_reward": 0.63046875, "rewards/batch_coverage_0": 0.42994843125343324, "rewards/batch_coverage_1": 0.42994843125343324, "rewards/batch_coverage_10": 0.5508076071739196, "rewards/batch_coverage_15": 0.5608129739761353, "rewards/batch_coverage_20": 0.5743337988853454, "rewards/batch_coverage_25": 0.5797275543212891, "rewards/batch_coverage_5": 0.5175644040107727, "rewards/brier_reward": 0.8383445382118225, "rewards/confidence_uniqueness_reward": 0.8627377271652221, "rewards/format_reward": 0.9909722208976746, "rewards/frontier_aurc_reward": -0.0014342849142849445, "rewards/frontier_ece_reward": 0.021663056686520576, "rewards/frontier_entropy_batch_reward": -0.5842878341674804, "signal/accuracy_reward/centered_abs_mean": 0.15357530415058135, "signal/accuracy_reward/group_std_mean": 0.20156333446502686, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07678765207529067, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07678765207529067, "signal/advantage_abs_mean": 0.13954857736825943, "signal/advantage_pre_scale_abs_mean": 0.13954857736825943, "signal/advantage_pre_scale_std": 0.2291879892349243, "signal/advantage_std": 0.2291879892349243, "signal/batch_coverage_0/centered_abs_mean": 0.13936588019132615, "signal/batch_coverage_0/group_std_mean": 0.18808189630508423, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013936588354408742, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013936588354408742, "signal/batch_coverage_1/centered_abs_mean": 0.13936588019132615, "signal/batch_coverage_1/group_std_mean": 0.18808189630508423, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013936588354408742, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013936588354408742, "signal/batch_coverage_10/centered_abs_mean": 0.16270072162151336, "signal/batch_coverage_10/group_std_mean": 0.2255598098039627, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016270072013139725, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016270072013139725, "signal/batch_coverage_15/centered_abs_mean": 0.1643664598464966, "signal/batch_coverage_15/group_std_mean": 0.228114452958107, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01643664576113224, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01643664576113224, "signal/batch_coverage_20/centered_abs_mean": 0.1735037475824356, "signal/batch_coverage_20/group_std_mean": 0.23918266594409943, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017350374720990658, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017350374720990658, "signal/batch_coverage_25/centered_abs_mean": 0.17976273596286774, "signal/batch_coverage_25/group_std_mean": 0.24600136280059814, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.017976274341344835, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.017976274341344835, "signal/batch_coverage_5/centered_abs_mean": 0.15355750322341918, "signal/batch_coverage_5/group_std_mean": 0.21199324131011962, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015355750359594822, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.015355750359594822, "signal/brier_reward/centered_abs_mean": 0.13386404365301133, "signal/brier_reward/group_std_mean": 0.17856364250183104, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013386405259370803, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013386405259370803, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06989814937114716, "signal/confidence_uniqueness_reward/group_std_mean": 0.0943081557750702, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006989815179258585, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006989815179258585, "signal/format_reward/centered_abs_mean": 0.015766059048473835, "signal/format_reward/group_std_mean": 0.03223867490887642, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007883029524236917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007883029524236917, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012373300269246101, "signal/frontier_aurc_reward/group_std_mean": 0.0018391921184957027, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5466626064153387e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5466626064153387e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.022429964691400527, "signal/frontier_ece_reward/group_std_mean": 0.03295571245253086, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002242996543645859, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002242996543645859, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3123690664768219, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3877885460853577, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0916666679084301, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031236908584833144, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031236908584833144, "step": 115 }, { "calibration/aurc": 0.23224894048684197, "calibration/batch_distribution_entropy": 0.6577461754018263, "calibration/buffer_distribution_entropy": 0.6644530368239384, "calibration/confidence_entropy": 0.21956154023502678, "calibration/coverage@0%": 0.030366492146596858, "calibration/coverage@1%": 0.1, "calibration/coverage@10%": 0.36432955586237065, "calibration/coverage@15%": 0.4144833431301519, "calibration/coverage@20%": 0.4844914084179733, "calibration/coverage@25%": 0.5257761130780694, "calibration/coverage@30%": 0.5780309761732261, "calibration/coverage@5%": 0.2661244241521195, "calibration/ece": 0.17240632915410462, "calibration/mean_confidence": 0.6587200250674344, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0059895833333333485, "completions/max_length": 3442.2, "completions/max_terminated_length": 3442.2, "completions/mean_length": 931.5639038085938, "completions/mean_terminated_length": 937.10859375, "completions/min_length": 0.0, "completions/min_terminated_length": 316.8, "epoch": 0.28799640004499943, "grad_norm": 0.0004797873261850327, "learning_rate": 2.6506024096385547e-06, "loss": -0.0053, "num_tokens": 274575520.0, "reward": 1.3031680583953857, "reward_std": 0.18711221516132354, "rewards/accuracy_reward": 0.6480902671813965, "rewards/batch_coverage_0": 0.4340886354446411, "rewards/batch_coverage_1": 0.4340886354446411, "rewards/batch_coverage_10": 0.5644845604896546, "rewards/batch_coverage_15": 0.5737747311592102, "rewards/batch_coverage_20": 0.5859205961227417, "rewards/batch_coverage_25": 0.588034474849701, "rewards/batch_coverage_5": 0.5137127637863159, "rewards/brier_reward": 0.8417695760726929, "rewards/confidence_uniqueness_reward": 0.8399509191513062, "rewards/format_reward": 0.9940104246139526, "rewards/frontier_aurc_reward": -0.001311807334423065, "rewards/frontier_ece_reward": 0.021232662722468376, "rewards/frontier_entropy_batch_reward": -0.575716894865036, "signal/accuracy_reward/centered_abs_mean": 0.1477213516831398, "signal/accuracy_reward/group_std_mean": 0.19991495907306672, "signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0738606758415699, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0738606758415699, "signal/advantage_abs_mean": 0.13333643972873688, "signal/advantage_pre_scale_abs_mean": 0.13333643972873688, "signal/advantage_pre_scale_std": 0.22238788306713103, "signal/advantage_std": 0.22238788306713103, "signal/batch_coverage_0/centered_abs_mean": 0.1329650953412056, "signal/batch_coverage_0/group_std_mean": 0.18248933255672456, "signal/batch_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013296510092914104, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013296510092914104, "signal/batch_coverage_1/centered_abs_mean": 0.1329650953412056, "signal/batch_coverage_1/group_std_mean": 0.18248933255672456, "signal/batch_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013296510092914104, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013296510092914104, "signal/batch_coverage_10/centered_abs_mean": 0.15834161937236785, "signal/batch_coverage_10/group_std_mean": 0.22070887088775634, "signal/batch_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015834162198007106, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.015834162198007106, "signal/batch_coverage_15/centered_abs_mean": 0.16037201285362243, "signal/batch_coverage_15/group_std_mean": 0.22399567663669587, "signal/batch_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016037201695144176, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016037201695144176, "signal/batch_coverage_20/centered_abs_mean": 0.1704261153936386, "signal/batch_coverage_20/group_std_mean": 0.2371358036994934, "signal/batch_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017042612284421922, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017042612284421922, "signal/batch_coverage_25/centered_abs_mean": 0.16791126430034636, "signal/batch_coverage_25/group_std_mean": 0.23394818603992462, "signal/batch_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016791127249598505, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016791127249598505, "signal/batch_coverage_5/centered_abs_mean": 0.14462463855743407, "signal/batch_coverage_5/group_std_mean": 0.20006984174251558, "signal/batch_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.014462463743984699, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.014462463743984699, "signal/brier_reward/centered_abs_mean": 0.12397899478673935, "signal/brier_reward/group_std_mean": 0.17036497890949248, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012397900223731995, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012397900223731995, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07810356169939041, "signal/confidence_uniqueness_reward/group_std_mean": 0.10339468717575073, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007810356188565492, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007810356188565492, "signal/format_reward/centered_abs_mean": 0.01047634556889534, "signal/format_reward/group_std_mean": 0.02356223687529564, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00523817278444767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00523817278444767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011394575121812522, "signal/frontier_aurc_reward/group_std_mean": 0.0017364894039928914, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4243219266063533e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4243219266063533e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.020640724897384645, "signal/frontier_ece_reward/group_std_mean": 0.030754799023270607, "signal/frontier_ece_reward/group_zero_std_frac": 0.016666666977107523, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020640727132558824, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020640727132558824, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31634058952331545, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3934920966625214, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07222222313284873, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03163406066596508, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03163406066596508, "step": 120 }, { "calibration/aurc": 0.13443913725346507, "calibration/batch_distribution_entropy": 0.5604718237969802, "calibration/buffer_distribution_entropy": 0.6658594200820985, "calibration/confidence_entropy": 0.17788369083866884, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.10520833333333332, "calibration/coverage@10%": 0.5163322368421053, "calibration/coverage@15%": 0.5758991228070175, "calibration/coverage@20%": 0.651672149122807, "calibration/coverage@25%": 0.8831524122807017, "calibration/coverage@30%": 0.9422916666666665, "calibration/coverage@5%": 0.20729166666666665, "calibration/ece": 0.11687316169530022, "calibration/mean_confidence": 0.7484829133562686, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005381944444444442, "completions/max_length": 3468.8, "completions/max_terminated_length": 3468.8, "completions/mean_length": 957.4337890625, "completions/mean_terminated_length": 962.6183471679688, "completions/min_length": 0.0, "completions/min_terminated_length": 340.2, "epoch": 0.2999962500468744, "grad_norm": 0.0005288660177029669, "learning_rate": 2.5e-06, "loss": -0.0053, "num_tokens": 288722821.0, "reward": 1.3045886754989624, "reward_std": 0.19153454899787903, "rewards/accuracy_reward": 0.6677083253860474, "rewards/batch_coverage_0": 0.4332961618900299, "rewards/batch_coverage_1": 0.4332961618900299, "rewards/batch_coverage_10": 0.566796088218689, "rewards/batch_coverage_15": 0.5802444577217102, "rewards/batch_coverage_20": 0.5940844774246216, "rewards/batch_coverage_25": 0.5986591339111328, "rewards/batch_coverage_5": 0.5141059577465057, "rewards/brier_reward": 0.8566888213157654, "rewards/confidence_uniqueness_reward": 0.7277114033699036, "rewards/format_reward": 0.9946180582046509, "rewards/frontier_aurc_reward": -0.0010761045676190406, "rewards/frontier_ece_reward": 0.022585730999708176, "rewards/frontier_entropy_batch_reward": -0.5930793166160584, "signal/accuracy_reward/centered_abs_mean": 0.16424696147441864, "signal/accuracy_reward/group_std_mean": 0.21489786207675934, "signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08212348073720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08212348073720932, "signal/advantage_abs_mean": 0.14050142765045165, "signal/advantage_pre_scale_abs_mean": 0.14050142765045165, "signal/advantage_pre_scale_std": 0.22893159091472626, "signal/advantage_std": 0.22893159091472626, "signal/batch_coverage_0/centered_abs_mean": 0.1296757608652115, "signal/batch_coverage_0/group_std_mean": 0.1786007136106491, "signal/batch_coverage_0/group_zero_std_frac": 0.027777778543531896, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012967575900256633, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012967575900256633, "signal/batch_coverage_1/centered_abs_mean": 0.1296757608652115, "signal/batch_coverage_1/group_std_mean": 0.1786007136106491, "signal/batch_coverage_1/group_zero_std_frac": 0.027777778543531896, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012967575900256633, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012967575900256633, "signal/batch_coverage_10/centered_abs_mean": 0.15659565031528472, "signal/batch_coverage_10/group_std_mean": 0.2186544954776764, "signal/batch_coverage_10/group_zero_std_frac": 0.025000000186264516, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01565956436097622, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01565956436097622, "signal/batch_coverage_15/centered_abs_mean": 0.16400947272777558, "signal/batch_coverage_15/group_std_mean": 0.22802729308605194, "signal/batch_coverage_15/group_zero_std_frac": 0.025000000186264516, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016400948539376257, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016400948539376257, "signal/batch_coverage_20/centered_abs_mean": 0.17170844078063965, "signal/batch_coverage_20/group_std_mean": 0.2369672656059265, "signal/batch_coverage_20/group_zero_std_frac": 0.025000000186264516, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017170844972133635, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017170844972133635, "signal/batch_coverage_25/centered_abs_mean": 0.17332231402397155, "signal/batch_coverage_25/group_std_mean": 0.23855508267879486, "signal/batch_coverage_25/group_zero_std_frac": 0.025000000186264516, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.017332231998443602, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.017332231998443602, "signal/batch_coverage_5/centered_abs_mean": 0.14173758327960967, "signal/batch_coverage_5/group_std_mean": 0.19865359365940094, "signal/batch_coverage_5/group_zero_std_frac": 0.027777778543531896, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.014173758402466773, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.014173758402466773, "signal/brier_reward/centered_abs_mean": 0.13510052263736724, "signal/brier_reward/group_std_mean": 0.17892874777317047, "signal/brier_reward/group_zero_std_frac": 0.025000000186264516, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013510052487254143, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013510052487254143, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12244658917188644, "signal/confidence_uniqueness_reward/group_std_mean": 0.15771516263484955, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.025000000186264516, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012244659289717675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012244659289717675, "signal/format_reward/centered_abs_mean": 0.009570312406867742, "signal/format_reward/group_std_mean": 0.019770674780011176, "signal/format_reward/group_zero_std_frac": 0.9111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004785156203433871, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004785156203433871, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017101092729717494, "signal/frontier_aurc_reward/group_std_mean": 0.0027070957468822597, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.137636729457881e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.137636729457881e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.019067096710205077, "signal/frontier_ece_reward/group_std_mean": 0.028569764271378518, "signal/frontier_ece_reward/group_zero_std_frac": 0.06388889066874981, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019067097455263138, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019067097455263138, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29008581936359407, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3632409691810608, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.15277777910232543, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02900858223438263, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02900858223438263, "step": 125 }, { "calibration/aurc": 0.19127228185204476, "calibration/batch_distribution_entropy": 0.5895829262380423, "calibration/buffer_distribution_entropy": 0.6624590738418712, "calibration/confidence_entropy": 0.19593788092214626, "calibration/coverage@0%": 0.07783524408961186, "calibration/coverage@1%": 0.13527649735331942, "calibration/coverage@10%": 0.30391945128015657, "calibration/coverage@15%": 0.39742277312595503, "calibration/coverage@20%": 0.44843057846775736, "calibration/coverage@25%": 0.7765386195340307, "calibration/coverage@30%": 0.8501852970922469, "calibration/coverage@5%": 0.21134445716307898, "calibration/ece": 0.12887487159762867, "calibration/mean_confidence": 0.6930903424604881, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00694444444444442, "completions/max_length": 3430.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 1003.2494140625, "completions/mean_terminated_length": 1010.2728271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 293.6, "epoch": 0.3119961000487494, "grad_norm": 0.0005818709614686668, "learning_rate": 2.349397590361446e-06, "loss": -0.0075, "num_tokens": 303405054.0, "reward": 1.2743345260620118, "reward_std": 0.2015679657459259, "rewards/accuracy_reward": 0.6296007037162781, "rewards/batch_coverage_0": 0.4014659643173218, "rewards/batch_coverage_1": 0.4014659643173218, "rewards/batch_coverage_10": 0.5354062855243683, "rewards/batch_coverage_15": 0.5588637709617614, "rewards/batch_coverage_20": 0.5805766224861145, "rewards/batch_coverage_25": 0.5847961187362671, "rewards/batch_coverage_5": 0.4904080927371979, "rewards/brier_reward": 0.8410807013511657, "rewards/confidence_uniqueness_reward": 0.8179298520088196, "rewards/format_reward": 0.99296875, "rewards/frontier_aurc_reward": -0.0014736841316334904, "rewards/frontier_ece_reward": 0.01973484009504318, "rewards/frontier_entropy_batch_reward": -0.6010460972785949, "signal/accuracy_reward/centered_abs_mean": 0.17263997793197633, "signal/accuracy_reward/group_std_mean": 0.22644833624362945, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08631998896598816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08631998896598816, "signal/advantage_abs_mean": 0.14573590755462645, "signal/advantage_pre_scale_abs_mean": 0.14573590755462645, "signal/advantage_pre_scale_std": 0.23568201065063477, "signal/advantage_std": 0.23568201065063477, "signal/batch_coverage_0/centered_abs_mean": 0.13741378337144852, "signal/batch_coverage_0/group_std_mean": 0.1913052588701248, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013741378672420979, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013741378672420979, "signal/batch_coverage_1/centered_abs_mean": 0.13741378337144852, "signal/batch_coverage_1/group_std_mean": 0.1913052588701248, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013741378672420979, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013741378672420979, "signal/batch_coverage_10/centered_abs_mean": 0.15665342509746552, "signal/batch_coverage_10/group_std_mean": 0.2212431699037552, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015665343031287193, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.015665343031287193, "signal/batch_coverage_15/centered_abs_mean": 0.16616102159023285, "signal/batch_coverage_15/group_std_mean": 0.23317878246307372, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016616101562976836, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016616101562976836, "signal/batch_coverage_20/centered_abs_mean": 0.18078482151031494, "signal/batch_coverage_20/group_std_mean": 0.25075055956840514, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.018078482896089553, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.018078482896089553, "signal/batch_coverage_25/centered_abs_mean": 0.18434585630893707, "signal/batch_coverage_25/group_std_mean": 0.254821240901947, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.018434586003422737, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.018434586003422737, "signal/batch_coverage_5/centered_abs_mean": 0.1469416558742523, "signal/batch_coverage_5/group_std_mean": 0.20604339838027955, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01469416581094265, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01469416581094265, "signal/brier_reward/centered_abs_mean": 0.13546621352434157, "signal/brier_reward/group_std_mean": 0.18270427882671356, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013546621613204479, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013546621613204479, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09114307463169098, "signal/confidence_uniqueness_reward/group_std_mean": 0.1203562155365944, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009114306885749102, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009114306885749102, "signal/format_reward/centered_abs_mean": 0.013167317770421506, "signal/format_reward/group_std_mean": 0.030222728475928308, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006583658885210753, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006583658885210753, "signal/frontier_aurc_reward/centered_abs_mean": 0.001271829183679074, "signal/frontier_aurc_reward/group_std_mean": 0.0019367508590221404, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5897865705483127e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5897865705483127e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.019031322747468948, "signal/frontier_ece_reward/group_std_mean": 0.029065588489174843, "signal/frontier_ece_reward/group_zero_std_frac": 0.008333333395421505, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019031323958188296, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019031323958188296, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28876765370368956, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3632201969623566, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.13888888955116271, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028876765072345732, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028876765072345732, "step": 130 }, { "calibration/aurc": 0.18872730403697074, "calibration/batch_distribution_entropy": 0.6812975794088347, "calibration/buffer_distribution_entropy": 0.6630834998201833, "calibration/confidence_entropy": 0.2641302845207065, "calibration/coverage@0%": 0.16066601049868767, "calibration/coverage@1%": 0.186186843832021, "calibration/coverage@10%": 0.3252712736565824, "calibration/coverage@15%": 0.37397193155131925, "calibration/coverage@20%": 0.5700034901472911, "calibration/coverage@25%": 0.7193106086140599, "calibration/coverage@30%": 0.7926389789838693, "calibration/coverage@5%": 0.21066601049868766, "calibration/ece": 0.1480757767000653, "calibration/mean_confidence": 0.7052663474885803, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00815972222222221, "completions/max_length": 3630.4, "completions/max_terminated_length": 3630.4, "completions/mean_length": 958.3525268554688, "completions/mean_terminated_length": 966.2191772460938, "completions/min_length": 0.0, "completions/min_terminated_length": 306.4, "epoch": 0.32399595005062437, "grad_norm": 0.0005636845016852021, "learning_rate": 2.1987951807228917e-06, "loss": -0.0079, "num_tokens": 317538299.0, "reward": 1.2903289556503297, "reward_std": 0.200746613740921, "rewards/accuracy_reward": 0.635937511920929, "rewards/batch_coverage_0": 0.4029585897922516, "rewards/batch_coverage_1": 0.4029585897922516, "rewards/batch_coverage_10": 0.5487715125083923, "rewards/batch_coverage_15": 0.560142207145691, "rewards/batch_coverage_20": 0.5713020324707031, "rewards/batch_coverage_25": 0.5746458768844604, "rewards/batch_coverage_5": 0.4942119956016541, "rewards/brier_reward": 0.8597426056861878, "rewards/confidence_uniqueness_reward": 0.8913098454475403, "rewards/format_reward": 0.9918402791023254, "rewards/frontier_aurc_reward": -0.0009640321717597544, "rewards/frontier_ece_reward": 0.01977999582886696, "rewards/frontier_entropy_batch_reward": -0.5613025665283203, "signal/accuracy_reward/centered_abs_mean": 0.15625000298023223, "signal/accuracy_reward/group_std_mean": 0.20937940776348113, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07812500149011611, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07812500149011611, "signal/advantage_abs_mean": 0.14403506219387055, "signal/advantage_pre_scale_abs_mean": 0.14403506219387055, "signal/advantage_pre_scale_std": 0.23285807073116302, "signal/advantage_std": 0.23285807073116302, "signal/batch_coverage_0/centered_abs_mean": 0.13533948957920075, "signal/batch_coverage_0/group_std_mean": 0.18287818133831024, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013533948920667171, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013533948920667171, "signal/batch_coverage_1/centered_abs_mean": 0.13533948957920075, "signal/batch_coverage_1/group_std_mean": 0.18287818133831024, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013533948920667171, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013533948920667171, "signal/batch_coverage_10/centered_abs_mean": 0.16238720417022706, "signal/batch_coverage_10/group_std_mean": 0.22439574897289277, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0162387203425169, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0162387203425169, "signal/batch_coverage_15/centered_abs_mean": 0.16828042268753052, "signal/batch_coverage_15/group_std_mean": 0.23210206627845764, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016828041709959506, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016828041709959506, "signal/batch_coverage_20/centered_abs_mean": 0.17479702234268188, "signal/batch_coverage_20/group_std_mean": 0.24096376001834868, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017479703202843665, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017479703202843665, "signal/batch_coverage_25/centered_abs_mean": 0.17699779570102692, "signal/batch_coverage_25/group_std_mean": 0.24334222674369813, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01769977994263172, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01769977994263172, "signal/batch_coverage_5/centered_abs_mean": 0.14455084204673768, "signal/batch_coverage_5/group_std_mean": 0.19790201783180236, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01445508487522602, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01445508487522602, "signal/brier_reward/centered_abs_mean": 0.12116786390542984, "signal/brier_reward/group_std_mean": 0.16832829415798187, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01211678646504879, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01211678646504879, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05779082030057907, "signal/confidence_uniqueness_reward/group_std_mean": 0.07999642044305802, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005779081955552101, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005779081955552101, "signal/format_reward/centered_abs_mean": 0.014279513992369175, "signal/format_reward/group_std_mean": 0.029366502165794374, "signal/format_reward/group_zero_std_frac": 0.8694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007139756996184588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007139756996184588, "signal/frontier_aurc_reward/centered_abs_mean": 0.0006708215922117233, "signal/frontier_aurc_reward/group_std_mean": 0.0010480164433829486, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.385269848076859e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.385269848076859e-06, "signal/frontier_ece_reward/centered_abs_mean": 0.021684225276112558, "signal/frontier_ece_reward/group_std_mean": 0.03304144144058228, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00216842251829803, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00216842251829803, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.311614853143692, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3836499214172363, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0861111119389534, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031161487475037573, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031161487475037573, "step": 135 }, { "calibration/aurc": 0.13130987388401666, "calibration/batch_distribution_entropy": 0.6597482867683313, "calibration/buffer_distribution_entropy": 0.6702807202345886, "calibration/confidence_entropy": 0.25772347328549483, "calibration/coverage@0%": 0.07819131853785902, "calibration/coverage@1%": 0.18912465187119235, "calibration/coverage@10%": 0.46348298520452574, "calibration/coverage@15%": 0.5631871518711924, "calibration/coverage@20%": 0.738234268929504, "calibration/coverage@25%": 0.8244881418624891, "calibration/coverage@30%": 0.8930138054830288, "calibration/coverage@5%": 0.366953818537859, "calibration/ece": 0.07870854715898996, "calibration/mean_confidence": 0.6800026414696745, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0073784722222222324, "completions/max_length": 3265.2, "completions/max_terminated_length": 3265.2, "completions/mean_length": 938.0556518554688, "completions/mean_terminated_length": 945.0173217773438, "completions/min_length": 0.0, "completions/min_terminated_length": 318.8, "epoch": 0.33599580005249935, "grad_norm": 0.0005809567519463599, "learning_rate": 2.0481927710843377e-06, "loss": -0.0076, "num_tokens": 331448924.0, "reward": 1.2944050073623656, "reward_std": 0.19232451021671296, "rewards/accuracy_reward": 0.6342881917953491, "rewards/batch_coverage_0": 0.42060819268226624, "rewards/batch_coverage_1": 0.42060819268226624, "rewards/batch_coverage_10": 0.5566146194934845, "rewards/batch_coverage_15": 0.5694395661354065, "rewards/batch_coverage_20": 0.578026282787323, "rewards/batch_coverage_25": 0.5797327399253845, "rewards/batch_coverage_5": 0.5075171232223511, "rewards/brier_reward": 0.8636380910873414, "rewards/confidence_uniqueness_reward": 0.8904500603675842, "rewards/format_reward": 0.9926215291023255, "rewards/frontier_aurc_reward": -0.000849519798066467, "rewards/frontier_ece_reward": 0.018118308484554292, "rewards/frontier_entropy_batch_reward": -0.5951453566551208, "signal/accuracy_reward/centered_abs_mean": 0.1548122853040695, "signal/accuracy_reward/group_std_mean": 0.2058233290910721, "signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07740614265203476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07740614265203476, "signal/advantage_abs_mean": 0.1391018182039261, "signal/advantage_pre_scale_abs_mean": 0.1391018182039261, "signal/advantage_pre_scale_std": 0.22803472578525544, "signal/advantage_std": 0.22803472578525544, "signal/batch_coverage_0/centered_abs_mean": 0.13350388407707214, "signal/batch_coverage_0/group_std_mean": 0.18149447143077851, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013350388407707215, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013350388407707215, "signal/batch_coverage_1/centered_abs_mean": 0.13350388407707214, "signal/batch_coverage_1/group_std_mean": 0.18149447143077851, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013350388407707215, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013350388407707215, "signal/batch_coverage_10/centered_abs_mean": 0.16032011508941652, "signal/batch_coverage_10/group_std_mean": 0.2208678036928177, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016032011434435844, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016032011434435844, "signal/batch_coverage_15/centered_abs_mean": 0.16713197529315948, "signal/batch_coverage_15/group_std_mean": 0.22929813265800475, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016713198088109494, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016713198088109494, "signal/batch_coverage_20/centered_abs_mean": 0.17368297278881073, "signal/batch_coverage_20/group_std_mean": 0.2377300262451172, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017368298023939133, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017368298023939133, "signal/batch_coverage_25/centered_abs_mean": 0.17488843202590942, "signal/batch_coverage_25/group_std_mean": 0.23914681673049926, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.017488843947649, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.017488843947649, "signal/batch_coverage_5/centered_abs_mean": 0.14723534882068634, "signal/batch_coverage_5/group_std_mean": 0.20246534347534179, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01472353506833315, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01472353506833315, "signal/brier_reward/centered_abs_mean": 0.12155497521162033, "signal/brier_reward/group_std_mean": 0.16532252728939056, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012155497632920741, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012155497632920741, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05778259187936783, "signal/confidence_uniqueness_reward/group_std_mean": 0.07849341556429863, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057782595045864586, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057782595045864586, "signal/format_reward/centered_abs_mean": 0.012679036427289247, "signal/format_reward/group_std_mean": 0.026040823385119437, "signal/format_reward/group_zero_std_frac": 0.8833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006339518213644623, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006339518213644623, "signal/frontier_aurc_reward/centered_abs_mean": 0.0005655112327076495, "signal/frontier_aurc_reward/group_std_mean": 0.000877034547738731, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.0688900450477375e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.0688900450477375e-06, "signal/frontier_ece_reward/centered_abs_mean": 0.02037911266088486, "signal/frontier_ece_reward/group_std_mean": 0.030708856508135797, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020379112334921955, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020379112334921955, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30631880164146424, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38140068054199217, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09444444552063942, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030631881207227707, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030631881207227707, "step": 140 }, { "calibration/aurc": 0.13242803980464785, "calibration/batch_distribution_entropy": 0.6897934814373954, "calibration/buffer_distribution_entropy": 0.6836537797572135, "calibration/confidence_entropy": 0.2472499154735496, "calibration/coverage@0%": 0.06841974896222075, "calibration/coverage@1%": 0.08460773851835651, "calibration/coverage@10%": 0.5285086137957431, "calibration/coverage@15%": 0.6229715886566511, "calibration/coverage@20%": 0.7273929512460187, "calibration/coverage@25%": 0.7921071247932414, "calibration/coverage@30%": 0.8703833987783595, "calibration/coverage@5%": 0.38859692328635415, "calibration/ece": 0.1295649301313624, "calibration/mean_confidence": 0.6769795923957921, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006423611111111116, "completions/max_length": 3228.4, "completions/max_terminated_length": 3228.4, "completions/mean_length": 909.677001953125, "completions/mean_terminated_length": 915.5734008789062, "completions/min_length": 0.0, "completions/min_terminated_length": 291.8, "epoch": 0.34799565005437433, "grad_norm": 0.0005263423081487417, "learning_rate": 1.8975903614457832e-06, "loss": -0.0073, "num_tokens": 344993011.0, "reward": 1.3420505285263062, "reward_std": 0.17107113003730773, "rewards/accuracy_reward": 0.6890625, "rewards/batch_coverage_0": 0.4715816080570221, "rewards/batch_coverage_1": 0.4715816080570221, "rewards/batch_coverage_10": 0.5932251930236816, "rewards/batch_coverage_15": 0.597273302078247, "rewards/batch_coverage_20": 0.6058772444725037, "rewards/batch_coverage_25": 0.6105739712715149, "rewards/batch_coverage_5": 0.5536993384361267, "rewards/brier_reward": 0.8571294665336608, "rewards/confidence_uniqueness_reward": 0.840834093093872, "rewards/format_reward": 0.9935763716697693, "rewards/frontier_aurc_reward": -0.000966754974797368, "rewards/frontier_ece_reward": 0.016152642853558063, "rewards/frontier_entropy_batch_reward": -0.6104970335960388, "signal/accuracy_reward/centered_abs_mean": 0.14091796725988387, "signal/accuracy_reward/group_std_mean": 0.18586435317993164, "signal/accuracy_reward/group_zero_std_frac": 0.4694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07045898362994193, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07045898362994193, "signal/advantage_abs_mean": 0.1214504137635231, "signal/advantage_pre_scale_abs_mean": 0.1214504137635231, "signal/advantage_pre_scale_std": 0.21154877543449402, "signal/advantage_std": 0.21154877543449402, "signal/batch_coverage_0/centered_abs_mean": 0.12109262049198151, "signal/batch_coverage_0/group_std_mean": 0.16618651747703553, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012109261937439442, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012109261937439442, "signal/batch_coverage_1/centered_abs_mean": 0.12109262049198151, "signal/batch_coverage_1/group_std_mean": 0.16618651747703553, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012109261937439442, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012109261937439442, "signal/batch_coverage_10/centered_abs_mean": 0.14189857691526414, "signal/batch_coverage_10/group_std_mean": 0.20135764181613922, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014189857989549637, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014189857989549637, "signal/batch_coverage_15/centered_abs_mean": 0.1426687866449356, "signal/batch_coverage_15/group_std_mean": 0.2024532824754715, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014266878738999367, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.014266878738999367, "signal/batch_coverage_20/centered_abs_mean": 0.14260076880455017, "signal/batch_coverage_20/group_std_mean": 0.20335802137851716, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01426007729023695, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01426007729023695, "signal/batch_coverage_25/centered_abs_mean": 0.1483145087957382, "signal/batch_coverage_25/group_std_mean": 0.2090164303779602, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014831452071666718, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.014831452071666718, "signal/batch_coverage_5/centered_abs_mean": 0.13110972493886947, "signal/batch_coverage_5/group_std_mean": 0.1835319548845291, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01311097275465727, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01311097275465727, "signal/brier_reward/centered_abs_mean": 0.11349970698356629, "signal/brier_reward/group_std_mean": 0.15323745906352998, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011349971219897271, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011349971219897271, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08850041627883912, "signal/confidence_uniqueness_reward/group_std_mean": 0.11195365190505982, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008850041963160037, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008850041963160037, "signal/format_reward/centered_abs_mean": 0.011295572854578495, "signal/format_reward/group_std_mean": 0.023191133886575697, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0056477864272892475, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0056477864272892475, "signal/frontier_aurc_reward/centered_abs_mean": 0.0007335938105825334, "signal/frontier_aurc_reward/group_std_mean": 0.0011282574851065874, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.169922486762517e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.169922486762517e-06, "signal/frontier_ece_reward/centered_abs_mean": 0.015803563967347146, "signal/frontier_ece_reward/group_std_mean": 0.02349414937198162, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015803564339876175, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015803564339876175, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.263643753528595, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33980435132980347, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1500000014901161, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026364374533295633, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026364374533295633, "step": 145 }, { "calibration/aurc": 0.1312147602821559, "calibration/batch_distribution_entropy": 0.661729676250131, "calibration/buffer_distribution_entropy": 0.6944097591767437, "calibration/confidence_entropy": 0.23713249978777262, "calibration/coverage@0%": 0.04393264545314922, "calibration/coverage@1%": 0.16382511857142878, "calibration/coverage@10%": 0.47878265415336674, "calibration/coverage@15%": 0.674541930220249, "calibration/coverage@20%": 0.7587017934939817, "calibration/coverage@25%": 0.816094054136481, "calibration/coverage@30%": 0.891664031966991, "calibration/coverage@5%": 0.22227004964753982, "calibration/ece": 0.11323309080521082, "calibration/mean_confidence": 0.7025271439706133, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006423611111111116, "completions/max_length": 3744.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 1006.754345703125, "completions/mean_terminated_length": 1013.2921508789062, "completions/min_length": 0.0, "completions/min_terminated_length": 297.4, "epoch": 0.3599955000562493, "grad_norm": 0.0005483371787704527, "learning_rate": 1.7469879518072292e-06, "loss": -0.0063, "num_tokens": 359701157.0, "reward": 1.3184950351715088, "reward_std": 0.18533724546432495, "rewards/accuracy_reward": 0.6743923664093018, "rewards/batch_coverage_0": 0.4469079613685608, "rewards/batch_coverage_1": 0.4469079613685608, "rewards/batch_coverage_10": 0.5693666577339173, "rewards/batch_coverage_15": 0.5873542547225952, "rewards/batch_coverage_20": 0.602312695980072, "rewards/batch_coverage_25": 0.6088623762130737, "rewards/batch_coverage_5": 0.5222943484783172, "rewards/brier_reward": 0.852564013004303, "rewards/confidence_uniqueness_reward": 0.8201781153678894, "rewards/format_reward": 0.9934027791023254, "rewards/frontier_aurc_reward": -0.0010987881105393171, "rewards/frontier_ece_reward": 0.013082112185657024, "rewards/frontier_entropy_batch_reward": -0.6237188339233398, "signal/accuracy_reward/centered_abs_mean": 0.16347113847732545, "signal/accuracy_reward/group_std_mean": 0.21139195263385774, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08173556923866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08173556923866272, "signal/advantage_abs_mean": 0.13506793975830078, "signal/advantage_pre_scale_abs_mean": 0.13506793975830078, "signal/advantage_pre_scale_std": 0.22540478706359862, "signal/advantage_std": 0.22540478706359862, "signal/batch_coverage_0/centered_abs_mean": 0.12697066515684127, "signal/batch_coverage_0/group_std_mean": 0.17325193881988527, "signal/batch_coverage_0/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012697066552937031, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012697066552937031, "signal/batch_coverage_1/centered_abs_mean": 0.12697066515684127, "signal/batch_coverage_1/group_std_mean": 0.17325193881988527, "signal/batch_coverage_1/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012697066552937031, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012697066552937031, "signal/batch_coverage_10/centered_abs_mean": 0.148236745595932, "signal/batch_coverage_10/group_std_mean": 0.2072461098432541, "signal/batch_coverage_10/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014823674410581588, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014823674410581588, "signal/batch_coverage_15/centered_abs_mean": 0.15683491826057433, "signal/batch_coverage_15/group_std_mean": 0.21849296391010284, "signal/batch_coverage_15/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015683492459356786, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.015683492459356786, "signal/batch_coverage_20/centered_abs_mean": 0.16781437695026397, "signal/batch_coverage_20/group_std_mean": 0.2322419822216034, "signal/batch_coverage_20/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016781437583267687, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.016781437583267687, "signal/batch_coverage_25/centered_abs_mean": 0.17468718588352203, "signal/batch_coverage_25/group_std_mean": 0.23929275274276735, "signal/batch_coverage_25/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.017468718439340593, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.017468718439340593, "signal/batch_coverage_5/centered_abs_mean": 0.1365981876850128, "signal/batch_coverage_5/group_std_mean": 0.19019657075405122, "signal/batch_coverage_5/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013659819029271603, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013659819029271603, "signal/brier_reward/centered_abs_mean": 0.12594723403453828, "signal/brier_reward/group_std_mean": 0.16902333796024321, "signal/brier_reward/group_zero_std_frac": 0.00555555559694767, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01259472370147705, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01259472370147705, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08781559914350509, "signal/confidence_uniqueness_reward/group_std_mean": 0.11438610553741455, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00555555559694767, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00878155967220664, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00878155967220664, "signal/format_reward/centered_abs_mean": 0.011816406156867743, "signal/format_reward/group_std_mean": 0.026017525047063828, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005908203078433871, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005908203078433871, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011653327848762274, "signal/frontier_aurc_reward/group_std_mean": 0.0018219074700027704, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4566658683179412e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4566658683179412e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.011827461235225201, "signal/frontier_ece_reward/group_std_mean": 0.017636168748140335, "signal/frontier_ece_reward/group_zero_std_frac": 0.011111111380159855, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011827461421489716, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011827461421489716, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.266205421090126, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34265450239181516, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1638888895511627, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026620543748140334, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026620543748140334, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.11871445559673784, "eval_calibration/batch_distribution_entropy": 0.579269640153368, "eval_calibration/buffer_distribution_entropy": 0.6991974471728701, "eval_calibration/confidence_entropy": 0.20986329461099262, "eval_calibration/coverage@0%": 0.25235215053763443, "eval_calibration/coverage@1%": 0.25235215053763443, "eval_calibration/coverage@10%": 0.597614247311828, "eval_calibration/coverage@15%": 0.6656586021505376, "eval_calibration/coverage@20%": 0.8015793010752689, "eval_calibration/coverage@25%": 0.8592069892473119, "eval_calibration/coverage@30%": 0.9583333333333334, "eval_calibration/coverage@5%": 0.29922715053763443, "eval_calibration/ece": 0.1486032851244478, "eval_calibration/mean_confidence": 0.693362338484394, "eval_completions/clipped_ratio": 0.006076388888888895, "eval_completions/max_length": 2499.5, "eval_completions/max_terminated_length": 2499.5, "eval_completions/mean_length": 950.9599100748698, "eval_completions/mean_terminated_length": 956.7284545898438, "eval_completions/min_length": 136.16666666666666, "eval_completions/min_terminated_length": 369.0, "eval_loss": 0.0, "eval_num_tokens": 359701157.0, "eval_reward": 1.0300345222155254, "eval_reward_std": 0.2921375830968221, "eval_rewards/accuracy_reward": 0.6527777711550394, "eval_rewards/batch_coverage_0": 0.13550970455010733, "eval_rewards/batch_coverage_1": 0.13550970455010733, "eval_rewards/batch_coverage_10": 0.17404532556732497, "eval_rewards/batch_coverage_15": 0.21777691567937532, "eval_rewards/batch_coverage_20": 0.2769256259004275, "eval_rewards/batch_coverage_25": 0.338382991651694, "eval_rewards/batch_coverage_5": 0.13637679815292358, "eval_rewards/brier_reward": 0.8511766990025839, "eval_rewards/confidence_uniqueness_reward": 0.7835536301136017, "eval_rewards/format_reward": 0.9939236144224802, "eval_rewards/frontier_aurc_reward": -0.0016888692043721676, "eval_rewards/frontier_ece_reward": 0.011714956567933163, "eval_rewards/frontier_entropy_batch_reward": -0.9939236144224802, "eval_runtime": 179.13, "eval_samples_per_second": 5.583, "eval_signal/accuracy_reward/centered_abs_mean": 0.4405381977558136, "eval_signal/accuracy_reward/group_std_mean": 0.4760441432396571, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2202690988779068, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2202690988779068, "eval_signal/advantage_abs_mean": 0.23840626577536264, "eval_signal/advantage_pre_scale_abs_mean": 0.23840626577536264, "eval_signal/advantage_pre_scale_std": 0.29046809176603955, "eval_signal/advantage_std": 0.29046809176603955, "eval_signal/batch_coverage_0/centered_abs_mean": 0.28725582361221313, "eval_signal/batch_coverage_0/group_std_mean": 0.39222339789072674, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.028725583106279373, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.028725583106279373, "eval_signal/batch_coverage_1/centered_abs_mean": 0.28725582361221313, "eval_signal/batch_coverage_1/group_std_mean": 0.39222339789072674, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.028725583106279373, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.028725583106279373, "eval_signal/batch_coverage_10/centered_abs_mean": 0.23628024011850357, "eval_signal/batch_coverage_10/group_std_mean": 0.30995993067820865, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02362802407393853, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.02362802407393853, "eval_signal/batch_coverage_15/centered_abs_mean": 0.249565156797568, "eval_signal/batch_coverage_15/group_std_mean": 0.32002828270196915, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.024956516921520233, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.024956516921520233, "eval_signal/batch_coverage_20/centered_abs_mean": 0.2884892448782921, "eval_signal/batch_coverage_20/group_std_mean": 0.3490825891494751, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.028848926226298015, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.028848926226298015, "eval_signal/batch_coverage_25/centered_abs_mean": 0.36286551256974536, "eval_signal/batch_coverage_25/group_std_mean": 0.42505596081415814, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.036286553367972374, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.036286553367972374, "eval_signal/batch_coverage_5/centered_abs_mean": 0.27063219497601193, "eval_signal/batch_coverage_5/group_std_mean": 0.36894193291664124, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.027063219187160332, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.027063219187160332, "eval_signal/brier_reward/centered_abs_mean": 0.21436868607997894, "eval_signal/brier_reward/group_std_mean": 0.2930321345726649, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021436869477232296, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021436869477232296, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.14700014889240265, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.17157389223575592, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014700015230725208, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014700015230725208, "eval_signal/format_reward/centered_abs_mean": 0.011664496424297491, "eval_signal/format_reward/group_std_mean": 0.031383837262789406, "eval_signal/format_reward/group_zero_std_frac": 0.8333333532015482, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0058322482121487456, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0058322482121487456, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002837341142973552, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006045101191072415, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5466764378118874e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5466764378118874e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.013863155307869116, "eval_signal/frontier_ece_reward/group_std_mean": 0.020291317875186603, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001386315542428444, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001386315542428444, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.011664496424297491, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.031383837262789406, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333532015482, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0011664496657128136, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0011664496657128136, "eval_steps_per_second": 0.033, "step": 150 }, { "calibration/aurc": 0.12383752382904105, "calibration/batch_distribution_entropy": 0.702563636124068, "calibration/buffer_distribution_entropy": 0.6998238675351348, "calibration/confidence_entropy": 0.2524229859327377, "calibration/coverage@0%": 0.05863069335654192, "calibration/coverage@1%": 0.1390484479257325, "calibration/coverage@10%": 0.5521362222291111, "calibration/coverage@15%": 0.7136257636058139, "calibration/coverage@20%": 0.793529087825562, "calibration/coverage@25%": 0.8577884487501664, "calibration/coverage@30%": 0.9075186022581707, "calibration/coverage@5%": 0.3468908052343468, "calibration/ece": 0.14021003480180688, "calibration/mean_confidence": 0.7065864211069093, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005989583333333326, "completions/max_length": 3560.4, "completions/max_terminated_length": 3560.4, "completions/mean_length": 945.785693359375, "completions/mean_terminated_length": 951.551171875, "completions/min_length": 0.0, "completions/min_terminated_length": 310.2, "epoch": 0.3719953500581243, "grad_norm": 0.0005548963672481477, "learning_rate": 1.5963855421686747e-06, "loss": -0.0039, "num_tokens": 373704320.0, "reward": 1.360306739807129, "reward_std": 0.17673680186271667, "rewards/accuracy_reward": 0.71875, "rewards/batch_coverage_0": 0.46235992312431334, "rewards/batch_coverage_1": 0.46235992312431334, "rewards/batch_coverage_10": 0.6080305695533752, "rewards/batch_coverage_15": 0.6152806878089905, "rewards/batch_coverage_20": 0.6200535535812378, "rewards/batch_coverage_25": 0.6224696636199951, "rewards/batch_coverage_5": 0.5473322987556457, "rewards/brier_reward": 0.8666894555091857, "rewards/confidence_uniqueness_reward": 0.8023838877677918, "rewards/format_reward": 0.9940104126930237, "rewards/frontier_aurc_reward": -0.0010168740176595748, "rewards/frontier_ece_reward": 0.01128138080239296, "rewards/frontier_entropy_batch_reward": -0.5788492798805237, "signal/accuracy_reward/centered_abs_mean": 0.15622830092906953, "signal/accuracy_reward/group_std_mean": 0.20322224497795105, "signal/accuracy_reward/group_zero_std_frac": 0.43055556416511537, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07811415046453477, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07811415046453477, "signal/advantage_abs_mean": 0.12816403806209564, "signal/advantage_pre_scale_abs_mean": 0.12816403806209564, "signal/advantage_pre_scale_std": 0.21938224732875825, "signal/advantage_std": 0.21938224732875825, "signal/batch_coverage_0/centered_abs_mean": 0.12863150537014006, "signal/batch_coverage_0/group_std_mean": 0.17149013876914979, "signal/batch_coverage_0/group_zero_std_frac": 0.02222222238779068, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012863150611519814, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012863150611519814, "signal/batch_coverage_1/centered_abs_mean": 0.12863150537014006, "signal/batch_coverage_1/group_std_mean": 0.17149013876914979, "signal/batch_coverage_1/group_zero_std_frac": 0.02222222238779068, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012863150611519814, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012863150611519814, "signal/batch_coverage_10/centered_abs_mean": 0.15439578890800476, "signal/batch_coverage_10/group_std_mean": 0.21234571635723115, "signal/batch_coverage_10/group_zero_std_frac": 0.02222222238779068, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015439579635858536, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.015439579635858536, "signal/batch_coverage_15/centered_abs_mean": 0.15866729915142058, "signal/batch_coverage_15/group_std_mean": 0.21745036244392396, "signal/batch_coverage_15/group_zero_std_frac": 0.02222222238779068, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01586672980338335, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01586672980338335, "signal/batch_coverage_20/centered_abs_mean": 0.16100625395774842, "signal/batch_coverage_20/group_std_mean": 0.2214736223220825, "signal/batch_coverage_20/group_zero_std_frac": 0.02222222238779068, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016100625693798064, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.016100625693798064, "signal/batch_coverage_25/centered_abs_mean": 0.16343387961387634, "signal/batch_coverage_25/group_std_mean": 0.22452974319458008, "signal/batch_coverage_25/group_zero_std_frac": 0.02222222238779068, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016343388892710208, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016343388892710208, "signal/batch_coverage_5/centered_abs_mean": 0.13978493511676787, "signal/batch_coverage_5/group_std_mean": 0.18872489631175995, "signal/batch_coverage_5/group_zero_std_frac": 0.02222222238779068, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013978493958711624, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013978493958711624, "signal/brier_reward/centered_abs_mean": 0.11147035211324692, "signal/brier_reward/group_std_mean": 0.15009200870990752, "signal/brier_reward/group_zero_std_frac": 0.02222222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011147035658359528, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011147035658359528, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08754237294197083, "signal/confidence_uniqueness_reward/group_std_mean": 0.11724895834922791, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.02222222238779068, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00875423727557063, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00875423727557063, "signal/format_reward/centered_abs_mean": 0.01034613698720932, "signal/format_reward/group_std_mean": 0.021106501668691637, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00517306849360466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00517306849360466, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011056010029278696, "signal/frontier_aurc_reward/group_std_mean": 0.0016772629460319877, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3820012736687203e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3820012736687203e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009840690344572068, "signal/frontier_ece_reward/group_std_mean": 0.01416495107114315, "signal/frontier_ece_reward/group_zero_std_frac": 0.04722222331911326, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009840689948759974, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009840689948759974, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27197187542915346, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3398334622383118, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1861111119389534, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027197187766432762, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027197187766432762, "step": 155 }, { "calibration/aurc": 0.10045860708990102, "calibration/batch_distribution_entropy": 0.5876831620875574, "calibration/buffer_distribution_entropy": 0.6983014411007992, "calibration/confidence_entropy": 0.20393004988738403, "calibration/coverage@0%": 0.13159268929503914, "calibration/coverage@1%": 0.1404699738903394, "calibration/coverage@10%": 0.6726799185487276, "calibration/coverage@15%": 0.7299267652580486, "calibration/coverage@20%": 0.8663485397658626, "calibration/coverage@25%": 0.8970835978835978, "calibration/coverage@30%": 0.9098666666666666, "calibration/coverage@5%": 0.5083079297763791, "calibration/ece": 0.09946244024236016, "calibration/mean_confidence": 0.7541408872262508, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007725694444444442, "completions/max_length": 3137.6, "completions/max_terminated_length": 3137.6, "completions/mean_length": 969.1362915039062, "completions/mean_terminated_length": 976.686181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 310.8, "epoch": 0.38399520005999926, "grad_norm": 0.0004891646676696837, "learning_rate": 1.4457831325301204e-06, "loss": -0.0087, "num_tokens": 387956066.0, "reward": 1.3025524616241455, "reward_std": 0.18135660588741304, "rewards/accuracy_reward": 0.6518229126930237, "rewards/batch_coverage_0": 0.4337134540081024, "rewards/batch_coverage_1": 0.4337134540081024, "rewards/batch_coverage_10": 0.5615343689918518, "rewards/batch_coverage_15": 0.5716203927993775, "rewards/batch_coverage_20": 0.5894143342971802, "rewards/batch_coverage_25": 0.5949154138565064, "rewards/batch_coverage_5": 0.5151754200458527, "rewards/brier_reward": 0.8427359223365783, "rewards/confidence_uniqueness_reward": 0.8198214292526245, "rewards/format_reward": 0.9922742962837219, "rewards/frontier_aurc_reward": -0.0014497652417048811, "rewards/frontier_ece_reward": 0.009934188798069954, "rewards/frontier_entropy_batch_reward": -0.5673593401908874, "signal/accuracy_reward/centered_abs_mean": 0.14571940302848815, "signal/accuracy_reward/group_std_mean": 0.1967881828546524, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07285970151424408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07285970151424408, "signal/advantage_abs_mean": 0.13016137778759002, "signal/advantage_pre_scale_abs_mean": 0.13016137778759002, "signal/advantage_pre_scale_std": 0.22107622325420379, "signal/advantage_std": 0.22107622325420379, "signal/batch_coverage_0/centered_abs_mean": 0.12322651296854019, "signal/batch_coverage_0/group_std_mean": 0.16826023459434508, "signal/batch_coverage_0/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012322651594877243, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012322651594877243, "signal/batch_coverage_1/centered_abs_mean": 0.12322651296854019, "signal/batch_coverage_1/group_std_mean": 0.16826023459434508, "signal/batch_coverage_1/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012322651594877243, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012322651594877243, "signal/batch_coverage_10/centered_abs_mean": 0.14654467403888702, "signal/batch_coverage_10/group_std_mean": 0.20563188791275025, "signal/batch_coverage_10/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01465446725487709, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01465446725487709, "signal/batch_coverage_15/centered_abs_mean": 0.15097981691360474, "signal/batch_coverage_15/group_std_mean": 0.2103422313928604, "signal/batch_coverage_15/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015097982622683049, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.015097982622683049, "signal/batch_coverage_20/centered_abs_mean": 0.16249073147773743, "signal/batch_coverage_20/group_std_mean": 0.22473800778388978, "signal/batch_coverage_20/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01624907273799181, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01624907273799181, "signal/batch_coverage_25/centered_abs_mean": 0.16827959716320037, "signal/batch_coverage_25/group_std_mean": 0.23056098818778992, "signal/batch_coverage_25/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016827961057424547, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016827961057424547, "signal/batch_coverage_5/centered_abs_mean": 0.1344536691904068, "signal/batch_coverage_5/group_std_mean": 0.18762777447700502, "signal/batch_coverage_5/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013445367850363255, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013445367850363255, "signal/brier_reward/centered_abs_mean": 0.11661726981401443, "signal/brier_reward/group_std_mean": 0.15799384713172912, "signal/brier_reward/group_zero_std_frac": 0.008333333395421505, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011661727912724018, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011661727912724018, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08469038307666779, "signal/confidence_uniqueness_reward/group_std_mean": 0.1138753980398178, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.008333333395421505, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008469038363546132, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008469038363546132, "signal/format_reward/centered_abs_mean": 0.014100477658212185, "signal/format_reward/group_std_mean": 0.02932555302977562, "signal/format_reward/group_zero_std_frac": 0.8722222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007050238829106092, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007050238829106092, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013116570771671832, "signal/frontier_aurc_reward/group_std_mean": 0.0020308221224695443, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.639571491978131e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.639571491978131e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00958633776754141, "signal/frontier_ece_reward/group_std_mean": 0.013803408481180668, "signal/frontier_ece_reward/group_zero_std_frac": 0.022222222574055196, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009586337953805923, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009586337953805923, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.285816890001297, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36413044333457945, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10555555820465087, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028581687808036806, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028581687808036806, "step": 160 }, { "calibration/aurc": 0.10952826831792246, "calibration/batch_distribution_entropy": 0.6720452611414822, "calibration/buffer_distribution_entropy": 0.6942523445932581, "calibration/confidence_entropy": 0.23859629732682824, "calibration/coverage@0%": 0.0203125, "calibration/coverage@1%": 0.11354166666666668, "calibration/coverage@10%": 0.6200841238051255, "calibration/coverage@15%": 0.7198256977121924, "calibration/coverage@20%": 0.7748366290112996, "calibration/coverage@25%": 0.8324017637657549, "calibration/coverage@30%": 0.8856770056801239, "calibration/coverage@5%": 0.5416919134296264, "calibration/ece": 0.08986915787646825, "calibration/mean_confidence": 0.6594262186067543, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0066840277777777905, "completions/max_length": 3693.0, "completions/max_terminated_length": 3693.0, "completions/mean_length": 986.36953125, "completions/mean_terminated_length": 993.1266357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 348.8, "epoch": 0.39599505006187424, "grad_norm": 0.0005287404055707157, "learning_rate": 1.2951807228915664e-06, "loss": -0.0053, "num_tokens": 402458115.0, "reward": 1.3131772518157958, "reward_std": 0.17054618895053864, "rewards/accuracy_reward": 0.6387152671813965, "rewards/batch_coverage_0": 0.4660345435142517, "rewards/batch_coverage_1": 0.4660345435142517, "rewards/batch_coverage_10": 0.5774924635887146, "rewards/batch_coverage_15": 0.5880944848060607, "rewards/batch_coverage_20": 0.5928570389747619, "rewards/batch_coverage_25": 0.6022311568260192, "rewards/batch_coverage_5": 0.5361921072006226, "rewards/brier_reward": 0.8557339429855346, "rewards/confidence_uniqueness_reward": 0.8580495834350585, "rewards/format_reward": 0.9933159828186036, "rewards/frontier_aurc_reward": -0.0011106105986982584, "rewards/frontier_ece_reward": 0.009472636692225932, "rewards/frontier_entropy_batch_reward": -0.5804377794265747, "signal/accuracy_reward/centered_abs_mean": 0.13853081464767455, "signal/accuracy_reward/group_std_mean": 0.1859588861465454, "signal/accuracy_reward/group_zero_std_frac": 0.4583333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06926540732383728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06926540732383728, "signal/advantage_abs_mean": 0.12401239275932312, "signal/advantage_pre_scale_abs_mean": 0.12401239275932312, "signal/advantage_pre_scale_std": 0.20768028497695923, "signal/advantage_std": 0.20768028497695923, "signal/batch_coverage_0/centered_abs_mean": 0.12554647028446198, "signal/batch_coverage_0/group_std_mean": 0.17078601717948913, "signal/batch_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012554646842181683, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012554646842181683, "signal/batch_coverage_1/centered_abs_mean": 0.12554647028446198, "signal/batch_coverage_1/group_std_mean": 0.17078601717948913, "signal/batch_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012554646842181683, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012554646842181683, "signal/batch_coverage_10/centered_abs_mean": 0.1449828863143921, "signal/batch_coverage_10/group_std_mean": 0.2013940066099167, "signal/batch_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014498288556933404, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014498288556933404, "signal/batch_coverage_15/centered_abs_mean": 0.14958977848291397, "signal/batch_coverage_15/group_std_mean": 0.20786149501800538, "signal/batch_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01495897825807333, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01495897825807333, "signal/batch_coverage_20/centered_abs_mean": 0.14767719060182571, "signal/batch_coverage_20/group_std_mean": 0.20538305938243867, "signal/batch_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014767719991505146, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.014767719991505146, "signal/batch_coverage_25/centered_abs_mean": 0.15909694731235505, "signal/batch_coverage_25/group_std_mean": 0.218677294254303, "signal/batch_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015909695252776145, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015909695252776145, "signal/batch_coverage_5/centered_abs_mean": 0.1341264843940735, "signal/batch_coverage_5/group_std_mean": 0.18379494845867156, "signal/batch_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013412648998200893, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013412648998200893, "signal/brier_reward/centered_abs_mean": 0.11033482253551483, "signal/brier_reward/group_std_mean": 0.14876342713832855, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011033482663333417, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011033482663333417, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0680958390235901, "signal/confidence_uniqueness_reward/group_std_mean": 0.08948377221822738, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00555555559694767, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006809584051370621, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006809584051370621, "signal/format_reward/centered_abs_mean": 0.00956488698720932, "signal/format_reward/group_std_mean": 0.019422347657382488, "signal/format_reward/group_zero_std_frac": 0.9138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00478244349360466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00478244349360466, "signal/frontier_aurc_reward/centered_abs_mean": 0.0009592408197931946, "signal/frontier_aurc_reward/group_std_mean": 0.0014806427410803736, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.1990510483883553e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.1990510483883553e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009095791541039944, "signal/frontier_ece_reward/group_std_mean": 0.012939628027379513, "signal/frontier_ece_reward/group_zero_std_frac": 0.019444444589316844, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009095791378058493, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009095791378058493, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2910008132457733, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3624585449695587, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1416666701436043, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02910008132457733, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02910008132457733, "step": 165 }, { "calibration/aurc": 0.1035497171259897, "calibration/batch_distribution_entropy": 0.6194467482238635, "calibration/buffer_distribution_entropy": 0.6964728802094691, "calibration/confidence_entropy": 0.2095385484232973, "calibration/coverage@0%": 0.12944753490401398, "calibration/coverage@1%": 0.14463078097731238, "calibration/coverage@10%": 0.6789776081184496, "calibration/coverage@15%": 0.7661971044367294, "calibration/coverage@20%": 0.843384525692714, "calibration/coverage@25%": 0.8931907133391196, "calibration/coverage@30%": 0.9410449389179757, "calibration/coverage@5%": 0.3630785060335414, "calibration/ece": 0.1010162488416066, "calibration/mean_confidence": 0.7447559374268262, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006770833333333348, "completions/max_length": 3683.4, "completions/max_terminated_length": 3683.4, "completions/mean_length": 968.5309936523438, "completions/mean_terminated_length": 975.1132934570312, "completions/min_length": 0.0, "completions/min_terminated_length": 331.2, "epoch": 0.4079949000637492, "grad_norm": 0.0006581774796359241, "learning_rate": 1.1445783132530121e-06, "loss": -0.0073, "num_tokens": 416704776.0, "reward": 1.3400552988052368, "reward_std": 0.17083775103092194, "rewards/accuracy_reward": 0.6823784708976746, "rewards/batch_coverage_0": 0.4667522668838501, "rewards/batch_coverage_1": 0.4667522668838501, "rewards/batch_coverage_10": 0.5824069619178772, "rewards/batch_coverage_15": 0.5999761819839478, "rewards/batch_coverage_20": 0.6159539341926574, "rewards/batch_coverage_25": 0.6178914666175842, "rewards/batch_coverage_5": 0.538118553161621, "rewards/brier_reward": 0.873453962802887, "rewards/confidence_uniqueness_reward": 0.8486914992332458, "rewards/format_reward": 0.9931423664093018, "rewards/frontier_aurc_reward": -0.0007993413018994033, "rewards/frontier_ece_reward": 0.00918477177619934, "rewards/frontier_entropy_batch_reward": -0.5961334109306335, "signal/accuracy_reward/centered_abs_mean": 0.1437554255127907, "signal/accuracy_reward/group_std_mean": 0.1925100266933441, "signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07187771275639535, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07187771275639535, "signal/advantage_abs_mean": 0.1232904389500618, "signal/advantage_pre_scale_abs_mean": 0.1232904389500618, "signal/advantage_pre_scale_std": 0.2124456822872162, "signal/advantage_std": 0.2124456822872162, "signal/batch_coverage_0/centered_abs_mean": 0.11614251732826233, "signal/batch_coverage_0/group_std_mean": 0.1566988781094551, "signal/batch_coverage_0/group_zero_std_frac": 0.011111111380159855, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011614251881837845, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011614251881837845, "signal/batch_coverage_1/centered_abs_mean": 0.11614251732826233, "signal/batch_coverage_1/group_std_mean": 0.1566988781094551, "signal/batch_coverage_1/group_zero_std_frac": 0.011111111380159855, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011614251881837845, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011614251881837845, "signal/batch_coverage_10/centered_abs_mean": 0.13287641555070878, "signal/batch_coverage_10/group_std_mean": 0.1858629435300827, "signal/batch_coverage_10/group_zero_std_frac": 0.011111111380159855, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013287641853094102, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013287641853094102, "signal/batch_coverage_15/centered_abs_mean": 0.1418210372328758, "signal/batch_coverage_15/group_std_mean": 0.19721043407917022, "signal/batch_coverage_15/group_zero_std_frac": 0.011111111380159855, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014182103984057903, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.014182103984057903, "signal/batch_coverage_20/centered_abs_mean": 0.15203088521957397, "signal/batch_coverage_20/group_std_mean": 0.2103426307439804, "signal/batch_coverage_20/group_zero_std_frac": 0.011111111380159855, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.015203088335692883, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.015203088335692883, "signal/batch_coverage_25/centered_abs_mean": 0.15397260189056397, "signal/batch_coverage_25/group_std_mean": 0.21220978796482087, "signal/batch_coverage_25/group_zero_std_frac": 0.011111111380159855, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015397260524332523, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015397260524332523, "signal/batch_coverage_5/centered_abs_mean": 0.12382150441408157, "signal/batch_coverage_5/group_std_mean": 0.16996322572231293, "signal/batch_coverage_5/group_zero_std_frac": 0.011111111380159855, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012382151931524277, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012382151931524277, "signal/brier_reward/centered_abs_mean": 0.10963434725999832, "signal/brier_reward/group_std_mean": 0.14850548803806304, "signal/brier_reward/group_zero_std_frac": 0.011111111380159855, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010963435098528863, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010963435098528863, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06850620657205582, "signal/confidence_uniqueness_reward/group_std_mean": 0.08941082209348679, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.011111111380159855, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006850621197372675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006850621197372675, "signal/format_reward/centered_abs_mean": 0.011572265438735485, "signal/format_reward/group_std_mean": 0.022224038653075696, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005786132719367743, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005786132719367743, "signal/frontier_aurc_reward/centered_abs_mean": 0.0007456650491803885, "signal/frontier_aurc_reward/group_std_mean": 0.001207607495598495, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.320813296653796e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.320813296653796e-06, "signal/frontier_ece_reward/centered_abs_mean": 0.008235116582363844, "signal/frontier_ece_reward/group_std_mean": 0.011895155161619186, "signal/frontier_ece_reward/group_zero_std_frac": 0.03888889010995626, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008235116838477552, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008235116838477552, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.280417799949646, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3480204403400421, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.18055555820465088, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02804178111255169, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02804178111255169, "step": 170 }, { "calibration/aurc": 0.08584934334504304, "calibration/batch_distribution_entropy": 0.7040840441985023, "calibration/buffer_distribution_entropy": 0.6904400099981481, "calibration/confidence_entropy": 0.24128080965627605, "calibration/coverage@0%": 0.20016072193439322, "calibration/coverage@1%": 0.24455671389644537, "calibration/coverage@10%": 0.6587761989219518, "calibration/coverage@15%": 0.7744429150954781, "calibration/coverage@20%": 0.8411959239143305, "calibration/coverage@25%": 0.9012597940524044, "calibration/coverage@30%": 0.9544108931847044, "calibration/coverage@5%": 0.5571439707095154, "calibration/ece": 0.08052070579497293, "calibration/mean_confidence": 0.6720276185336083, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00616319444444442, "completions/max_length": 3571.8, "completions/max_terminated_length": 3571.8, "completions/mean_length": 1022.02509765625, "completions/mean_terminated_length": 1028.35546875, "completions/min_length": 0.0, "completions/min_terminated_length": 332.2, "epoch": 0.4199947500656242, "grad_norm": 0.000544732843991369, "learning_rate": 9.93975903614458e-07, "loss": -0.0048, "num_tokens": 431586473.0, "reward": 1.342452049255371, "reward_std": 0.17677203714847564, "rewards/accuracy_reward": 0.6801215291023255, "rewards/batch_coverage_0": 0.4610255122184753, "rewards/batch_coverage_1": 0.4610255122184753, "rewards/batch_coverage_10": 0.5977589607238769, "rewards/batch_coverage_15": 0.609941303730011, "rewards/batch_coverage_20": 0.6217445015907288, "rewards/batch_coverage_25": 0.6220832824707031, "rewards/batch_coverage_5": 0.538027310371399, "rewards/brier_reward": 0.8769816517829895, "rewards/confidence_uniqueness_reward": 0.8499362587928772, "rewards/format_reward": 0.9938368082046509, "rewards/frontier_aurc_reward": -0.0008405924076214432, "rewards/frontier_ece_reward": 0.008787432871758937, "rewards/frontier_entropy_batch_reward": -0.5924777626991272, "signal/accuracy_reward/centered_abs_mean": 0.13968641459941863, "signal/accuracy_reward/group_std_mean": 0.19224803447723388, "signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06984320729970932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06984320729970932, "signal/advantage_abs_mean": 0.12613593339920043, "signal/advantage_pre_scale_abs_mean": 0.12613593339920043, "signal/advantage_pre_scale_std": 0.2150324821472168, "signal/advantage_std": 0.2150324821472168, "signal/batch_coverage_0/centered_abs_mean": 0.11822779774665833, "signal/batch_coverage_0/group_std_mean": 0.16137382984161378, "signal/batch_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011822779849171638, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011822779849171638, "signal/batch_coverage_1/centered_abs_mean": 0.11822779774665833, "signal/batch_coverage_1/group_std_mean": 0.16137382984161378, "signal/batch_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011822779849171638, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011822779849171638, "signal/batch_coverage_10/centered_abs_mean": 0.14888089001178742, "signal/batch_coverage_10/group_std_mean": 0.20695665776729583, "signal/batch_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01488808896392584, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01488808896392584, "signal/batch_coverage_15/centered_abs_mean": 0.15403354167938232, "signal/batch_coverage_15/group_std_mean": 0.21282553374767305, "signal/batch_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015403354167938232, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.015403354167938232, "signal/batch_coverage_20/centered_abs_mean": 0.16212478578090667, "signal/batch_coverage_20/group_std_mean": 0.22311893701553345, "signal/batch_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016212479583919047, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.016212479583919047, "signal/batch_coverage_25/centered_abs_mean": 0.1614815503358841, "signal/batch_coverage_25/group_std_mean": 0.22323354780673982, "signal/batch_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01614815555512905, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01614815555512905, "signal/batch_coverage_5/centered_abs_mean": 0.12908718585968018, "signal/batch_coverage_5/group_std_mean": 0.1776062160730362, "signal/batch_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012908719293773175, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012908719293773175, "signal/brier_reward/centered_abs_mean": 0.10861355811357498, "signal/brier_reward/group_std_mean": 0.15155054926872252, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010861356370151043, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010861356370151043, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0684033289551735, "signal/confidence_uniqueness_reward/group_std_mean": 0.09072864055633545, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006840333249419928, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006840333249419928, "signal/format_reward/centered_abs_mean": 0.01100802943110466, "signal/format_reward/group_std_mean": 0.021973739936947824, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00550401471555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00550401471555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0007584088016301394, "signal/frontier_aurc_reward/group_std_mean": 0.001169906440190971, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.480110020376741e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.480110020376741e-06, "signal/frontier_ece_reward/centered_abs_mean": 0.008332806825637817, "signal/frontier_ece_reward/group_std_mean": 0.012288989685475826, "signal/frontier_ece_reward/group_zero_std_frac": 0.02222222238779068, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008332806755788624, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008332806755788624, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2823038697242737, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35158644914627074, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.16111111342906953, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028230386972427367, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028230386972427367, "step": 175 }, { "calibration/aurc": 0.0935197326499746, "calibration/batch_distribution_entropy": 0.5561873594983229, "calibration/buffer_distribution_entropy": 0.6859575233089831, "calibration/confidence_entropy": 0.19136952886651085, "calibration/coverage@0%": 0.0421875, "calibration/coverage@1%": 0.0421875, "calibration/coverage@10%": 0.6712114141386544, "calibration/coverage@15%": 0.800580566611605, "calibration/coverage@20%": 0.8736033708606643, "calibration/coverage@25%": 0.9231577989486137, "calibration/coverage@30%": 0.9632084782897822, "calibration/coverage@5%": 0.38217653629633197, "calibration/ece": 0.08765554258794374, "calibration/mean_confidence": 0.7494340338297194, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008420138888888862, "completions/max_length": 3728.2, "completions/max_terminated_length": 3728.2, "completions/mean_length": 996.6256103515625, "completions/mean_terminated_length": 1005.0981567382812, "completions/min_length": 0.0, "completions/min_terminated_length": 355.6, "epoch": 0.4319946000674992, "grad_norm": 0.0005954225780442357, "learning_rate": 8.433734939759036e-07, "loss": -0.0082, "num_tokens": 446167568.0, "reward": 1.3270483493804932, "reward_std": 0.177334925532341, "rewards/accuracy_reward": 0.6724826335906983, "rewards/batch_coverage_0": 0.47177304029464723, "rewards/batch_coverage_1": 0.47177304029464723, "rewards/batch_coverage_10": 0.5869078040122986, "rewards/batch_coverage_15": 0.5977437496185303, "rewards/batch_coverage_20": 0.6142584323883057, "rewards/batch_coverage_25": 0.620146906375885, "rewards/batch_coverage_5": 0.5393850266933441, "rewards/brier_reward": 0.8569316625595093, "rewards/confidence_uniqueness_reward": 0.806365144252777, "rewards/format_reward": 0.9914930582046508, "rewards/frontier_aurc_reward": -0.001556425471790135, "rewards/frontier_ece_reward": 0.008400358818471431, "rewards/frontier_entropy_batch_reward": -0.622886312007904, "signal/accuracy_reward/centered_abs_mean": 0.15272895097732545, "signal/accuracy_reward/group_std_mean": 0.1959119915962219, "signal/accuracy_reward/group_zero_std_frac": 0.46111111640930175, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07636447548866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07636447548866272, "signal/advantage_abs_mean": 0.13030389845371246, "signal/advantage_pre_scale_abs_mean": 0.13030389845371246, "signal/advantage_pre_scale_std": 0.22473380863666534, "signal/advantage_std": 0.22473380863666534, "signal/batch_coverage_0/centered_abs_mean": 0.11375184804201126, "signal/batch_coverage_0/group_std_mean": 0.1565089762210846, "signal/batch_coverage_0/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01137518547475338, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01137518547475338, "signal/batch_coverage_1/centered_abs_mean": 0.11375184804201126, "signal/batch_coverage_1/group_std_mean": 0.1565089762210846, "signal/batch_coverage_1/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01137518547475338, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01137518547475338, "signal/batch_coverage_10/centered_abs_mean": 0.13260578811168672, "signal/batch_coverage_10/group_std_mean": 0.18635527193546295, "signal/batch_coverage_10/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013260579481720925, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013260579481720925, "signal/batch_coverage_15/centered_abs_mean": 0.13684237003326416, "signal/batch_coverage_15/group_std_mean": 0.19186861515045167, "signal/batch_coverage_15/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013684236630797385, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013684236630797385, "signal/batch_coverage_20/centered_abs_mean": 0.14961515069007875, "signal/batch_coverage_20/group_std_mean": 0.2078137993812561, "signal/batch_coverage_20/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014961515367031098, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.014961515367031098, "signal/batch_coverage_25/centered_abs_mean": 0.15616746842861176, "signal/batch_coverage_25/group_std_mean": 0.21553831696510314, "signal/batch_coverage_25/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015616746619343758, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015616746619343758, "signal/batch_coverage_5/centered_abs_mean": 0.12186728864908218, "signal/batch_coverage_5/group_std_mean": 0.169427090883255, "signal/batch_coverage_5/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012186729349195957, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012186729349195957, "signal/brier_reward/centered_abs_mean": 0.11513119339942932, "signal/brier_reward/group_std_mean": 0.15451994240283967, "signal/brier_reward/group_zero_std_frac": 0.008333333395421505, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011513119749724864, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011513119749724864, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.082037752866745, "signal/confidence_uniqueness_reward/group_std_mean": 0.10694921165704727, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.008333333395421505, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008203774876892566, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008203774876892566, "signal/format_reward/centered_abs_mean": 0.013975694589316846, "signal/format_reward/group_std_mean": 0.02479696534574032, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006987847294658423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006987847294658423, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012580604292452336, "signal/frontier_aurc_reward/group_std_mean": 0.0019183105323463678, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5725756384199485e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5725756384199485e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007519526779651642, "signal/frontier_ece_reward/group_std_mean": 0.01085415929555893, "signal/frontier_ece_reward/group_zero_std_frac": 0.027777778171002864, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00075195268727839, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00075195268727839, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25678522884845734, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3297272324562073, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.18055555522441863, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025678522139787673, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025678522139787673, "step": 180 }, { "calibration/aurc": 0.14623769358867256, "calibration/batch_distribution_entropy": 0.6229734962511724, "calibration/buffer_distribution_entropy": 0.6802651233028907, "calibration/confidence_entropy": 0.23214373257914972, "calibration/coverage@0%": 0.03498014577893821, "calibration/coverage@1%": 0.10129868363794606, "calibration/coverage@10%": 0.29276952785030463, "calibration/coverage@15%": 0.7551255491317501, "calibration/coverage@20%": 0.855133763840992, "calibration/coverage@25%": 0.9205799220272904, "calibration/coverage@30%": 0.9515625, "calibration/coverage@5%": 0.10129868363794606, "calibration/ece": 0.141819133742561, "calibration/mean_confidence": 0.7414098737269303, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007899305555555559, "completions/max_length": 3349.6, "completions/max_terminated_length": 3349.6, "completions/mean_length": 989.527099609375, "completions/mean_terminated_length": 997.413623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 297.6, "epoch": 0.44399445006937416, "grad_norm": 0.0005679742316715419, "learning_rate": 6.927710843373495e-07, "loss": -0.008, "num_tokens": 460656968.0, "reward": 1.3158263444900513, "reward_std": 0.17870357632637024, "rewards/accuracy_reward": 0.650868046283722, "rewards/batch_coverage_0": 0.4548503696918488, "rewards/batch_coverage_1": 0.4548503696918488, "rewards/batch_coverage_10": 0.5850084662437439, "rewards/batch_coverage_15": 0.5976876854896546, "rewards/batch_coverage_20": 0.6115280747413635, "rewards/batch_coverage_25": 0.6098129034042359, "rewards/batch_coverage_5": 0.5300012052059173, "rewards/brier_reward": 0.8578875422477722, "rewards/confidence_uniqueness_reward": 0.832545804977417, "rewards/format_reward": 0.9921006917953491, "rewards/frontier_aurc_reward": -0.001072891615331173, "rewards/frontier_ece_reward": 0.006930419430136681, "rewards/frontier_entropy_batch_reward": -0.5975492119789123, "signal/accuracy_reward/centered_abs_mean": 0.14483506977558136, "signal/accuracy_reward/group_std_mean": 0.1936337411403656, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07241753488779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07241753488779068, "signal/advantage_abs_mean": 0.12828055024147034, "signal/advantage_pre_scale_abs_mean": 0.12828055024147034, "signal/advantage_pre_scale_std": 0.2207647293806076, "signal/advantage_std": 0.2207647293806076, "signal/batch_coverage_0/centered_abs_mean": 0.11986812949180603, "signal/batch_coverage_0/group_std_mean": 0.1622163325548172, "signal/batch_coverage_0/group_zero_std_frac": 0.016666666977107523, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011986814253032208, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011986814253032208, "signal/batch_coverage_1/centered_abs_mean": 0.11986812949180603, "signal/batch_coverage_1/group_std_mean": 0.1622163325548172, "signal/batch_coverage_1/group_zero_std_frac": 0.016666666977107523, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011986814253032208, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011986814253032208, "signal/batch_coverage_10/centered_abs_mean": 0.1427648186683655, "signal/batch_coverage_10/group_std_mean": 0.19960642158985137, "signal/batch_coverage_10/group_zero_std_frac": 0.016666666977107523, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014276482164859772, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014276482164859772, "signal/batch_coverage_15/centered_abs_mean": 0.1482395738363266, "signal/batch_coverage_15/group_std_mean": 0.20609240233898163, "signal/batch_coverage_15/group_zero_std_frac": 0.016666666977107523, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014823957718908787, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.014823957718908787, "signal/batch_coverage_20/centered_abs_mean": 0.15430747270584105, "signal/batch_coverage_20/group_std_mean": 0.21396797895431519, "signal/batch_coverage_20/group_zero_std_frac": 0.016666666977107523, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.015430747903883458, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.015430747903883458, "signal/batch_coverage_25/centered_abs_mean": 0.1554022252559662, "signal/batch_coverage_25/group_std_mean": 0.21482096016407012, "signal/batch_coverage_25/group_zero_std_frac": 0.016666666977107523, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015540223196148872, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015540223196148872, "signal/batch_coverage_5/centered_abs_mean": 0.1292220577597618, "signal/batch_coverage_5/group_std_mean": 0.17766384184360504, "signal/batch_coverage_5/group_zero_std_frac": 0.016666666977107523, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012922205589711666, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012922205589711666, "signal/brier_reward/centered_abs_mean": 0.11412379443645478, "signal/brier_reward/group_std_mean": 0.15445701479911805, "signal/brier_reward/group_zero_std_frac": 0.016666666977107523, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011412379890680313, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011412379890680313, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07410852909088135, "signal/confidence_uniqueness_reward/group_std_mean": 0.09989662021398545, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.016666666977107523, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0074108530767261985, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0074108530767261985, "signal/format_reward/centered_abs_mean": 0.01372070349752903, "signal/format_reward/group_std_mean": 0.027843139693140985, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006860351748764515, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006860351748764515, "signal/frontier_aurc_reward/centered_abs_mean": 0.0009126473218202591, "signal/frontier_aurc_reward/group_std_mean": 0.001439414289779961, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.1408091813791543e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.1408091813791543e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00754654137417674, "signal/frontier_ece_reward/group_std_mean": 0.010986723564565181, "signal/frontier_ece_reward/group_zero_std_frac": 0.02777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007546541397459805, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007546541397459805, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27296812534332277, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33934701681137086, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.18611110895872116, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027296814322471618, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027296814322471618, "step": 185 }, { "calibration/aurc": 0.13128068853617544, "calibration/batch_distribution_entropy": 0.4795554241420736, "calibration/buffer_distribution_entropy": 0.6778663229174313, "calibration/confidence_entropy": 0.17752499581463171, "calibration/coverage@0%": 0.03769633507853403, "calibration/coverage@1%": 0.03769633507853403, "calibration/coverage@10%": 0.47621794820527397, "calibration/coverage@15%": 0.5475001368753905, "calibration/coverage@20%": 0.8411420084895374, "calibration/coverage@25%": 0.9311601630611935, "calibration/coverage@30%": 0.9657873888272697, "calibration/coverage@5%": 0.2875282446955084, "calibration/ece": 0.10620229144234192, "calibration/mean_confidence": 0.7968994675856337, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006336805555555558, "completions/max_length": 3539.6, "completions/max_terminated_length": 3539.6, "completions/mean_length": 988.577001953125, "completions/mean_terminated_length": 994.8688232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 366.4, "epoch": 0.45599430007124914, "grad_norm": 0.0006201478536240757, "learning_rate": 5.421686746987952e-07, "loss": -0.007, "num_tokens": 475128319.0, "reward": 1.3506378889083863, "reward_std": 0.17299841344356537, "rewards/accuracy_reward": 0.7050347328186035, "rewards/batch_coverage_0": 0.4621035397052765, "rewards/batch_coverage_1": 0.4621035397052765, "rewards/batch_coverage_10": 0.6018279314041137, "rewards/batch_coverage_15": 0.6118657708168029, "rewards/batch_coverage_20": 0.6287239551544189, "rewards/batch_coverage_25": 0.6334514379501343, "rewards/batch_coverage_5": 0.552715665102005, "rewards/brier_reward": 0.8670693159103393, "rewards/confidence_uniqueness_reward": 0.8103796005249023, "rewards/format_reward": 0.9936631917953491, "rewards/frontier_aurc_reward": -0.0011533482233062387, "rewards/frontier_ece_reward": 0.006376712769269943, "rewards/frontier_entropy_batch_reward": -0.6235847473144531, "signal/accuracy_reward/centered_abs_mean": 0.14418402314186096, "signal/accuracy_reward/group_std_mean": 0.19555995464324952, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07209201157093048, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07209201157093048, "signal/advantage_abs_mean": 0.12151096612215043, "signal/advantage_pre_scale_abs_mean": 0.12151096612215043, "signal/advantage_pre_scale_std": 0.21480103731155395, "signal/advantage_std": 0.21480103731155395, "signal/batch_coverage_0/centered_abs_mean": 0.11545856446027755, "signal/batch_coverage_0/group_std_mean": 0.15694105625152588, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011545856110751629, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011545856110751629, "signal/batch_coverage_1/centered_abs_mean": 0.11545856446027755, "signal/batch_coverage_1/group_std_mean": 0.15694105625152588, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011545856110751629, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011545856110751629, "signal/batch_coverage_10/centered_abs_mean": 0.13779610842466355, "signal/batch_coverage_10/group_std_mean": 0.19468652307987214, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013779611513018607, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013779611513018607, "signal/batch_coverage_15/centered_abs_mean": 0.14144036769866944, "signal/batch_coverage_15/group_std_mean": 0.19916775822639465, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01414403710514307, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01414403710514307, "signal/batch_coverage_20/centered_abs_mean": 0.15380342304706573, "signal/batch_coverage_20/group_std_mean": 0.21572276055812836, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.015380342863500119, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.015380342863500119, "signal/batch_coverage_25/centered_abs_mean": 0.15857919156551362, "signal/batch_coverage_25/group_std_mean": 0.22120807468891143, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015857919491827487, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015857919491827487, "signal/batch_coverage_5/centered_abs_mean": 0.12643881738185883, "signal/batch_coverage_5/group_std_mean": 0.17589333653450012, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01264388207346201, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01264388207346201, "signal/brier_reward/centered_abs_mean": 0.11211207658052444, "signal/brier_reward/group_std_mean": 0.15370774865150452, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011211208067834377, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011211208067834377, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08398310542106628, "signal/confidence_uniqueness_reward/group_std_mean": 0.11177106499671936, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008398310374468565, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008398310374468565, "signal/format_reward/centered_abs_mean": 0.011474609375, "signal/format_reward/group_std_mean": 0.024381159618496896, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0057373046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0057373046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0010549181955866517, "signal/frontier_aurc_reward/group_std_mean": 0.0016289620660245418, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3186477372073568e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3186477372073568e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.006933016143739224, "signal/frontier_ece_reward/group_std_mean": 0.010268824733793735, "signal/frontier_ece_reward/group_zero_std_frac": 0.022222222574055196, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000693301623687148, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000693301623687148, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25669166445732117, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3272492527961731, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.2, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025669166445732118, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025669166445732118, "step": 190 }, { "calibration/aurc": 0.11944810496438059, "calibration/batch_distribution_entropy": 0.6395999867984298, "calibration/buffer_distribution_entropy": 0.6720075917237143, "calibration/confidence_entropy": 0.20917919309424385, "calibration/coverage@0%": 0.06911732456140351, "calibration/coverage@1%": 0.14488486842105264, "calibration/coverage@10%": 0.5310995338477082, "calibration/coverage@15%": 0.6642291838890421, "calibration/coverage@20%": 0.8316384506751172, "calibration/coverage@25%": 0.8892670157068062, "calibration/coverage@30%": 0.9388170811518325, "calibration/coverage@5%": 0.37537739965095984, "calibration/ece": 0.13485540453524325, "calibration/mean_confidence": 0.6940419641656674, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007812500000000023, "completions/max_length": 3358.0, "completions/max_terminated_length": 3358.0, "completions/mean_length": 1008.2052124023437, "completions/mean_terminated_length": 1016.1783325195313, "completions/min_length": 0.0, "completions/min_terminated_length": 331.4, "epoch": 0.46799415007312406, "grad_norm": 0.0005691568367183208, "learning_rate": 3.91566265060241e-07, "loss": -0.0071, "num_tokens": 489823707.0, "reward": 1.3036153316497803, "reward_std": 0.1823348581790924, "rewards/accuracy_reward": 0.6552083373069764, "rewards/batch_coverage_0": 0.4402458965778351, "rewards/batch_coverage_1": 0.4402458965778351, "rewards/batch_coverage_10": 0.5593804001808167, "rewards/batch_coverage_15": 0.5775806784629822, "rewards/batch_coverage_20": 0.6022377133369445, "rewards/batch_coverage_25": 0.6144470334053039, "rewards/batch_coverage_5": 0.5079279005527496, "rewards/brier_reward": 0.8445484519004822, "rewards/confidence_uniqueness_reward": 0.8110572457313537, "rewards/format_reward": 0.9921875, "rewards/frontier_aurc_reward": -0.001369133684784174, "rewards/frontier_ece_reward": 0.005712912511080503, "rewards/frontier_entropy_batch_reward": -0.6040391564369202, "signal/accuracy_reward/centered_abs_mean": 0.14594184011220931, "signal/accuracy_reward/group_std_mean": 0.19639940857887267, "signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07297092005610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07297092005610466, "signal/advantage_abs_mean": 0.13279823064804078, "signal/advantage_pre_scale_abs_mean": 0.13279823064804078, "signal/advantage_pre_scale_std": 0.22284360826015473, "signal/advantage_std": 0.22284360826015473, "signal/batch_coverage_0/centered_abs_mean": 0.12249585688114166, "signal/batch_coverage_0/group_std_mean": 0.16909070312976837, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012249586079269648, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012249586079269648, "signal/batch_coverage_1/centered_abs_mean": 0.12249585688114166, "signal/batch_coverage_1/group_std_mean": 0.16909070312976837, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012249586079269648, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012249586079269648, "signal/batch_coverage_10/centered_abs_mean": 0.14008867293596267, "signal/batch_coverage_10/group_std_mean": 0.19668514728546144, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014008867554366589, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014008867554366589, "signal/batch_coverage_15/centered_abs_mean": 0.14652444720268248, "signal/batch_coverage_15/group_std_mean": 0.20595037341117858, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01465244460850954, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01465244460850954, "signal/batch_coverage_20/centered_abs_mean": 0.16244602501392363, "signal/batch_coverage_20/group_std_mean": 0.22658770382404328, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016244602762162684, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.016244602762162684, "signal/batch_coverage_25/centered_abs_mean": 0.17667838633060456, "signal/batch_coverage_25/group_std_mean": 0.2433231681585312, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01766783855855465, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01766783855855465, "signal/batch_coverage_5/centered_abs_mean": 0.12719610780477525, "signal/batch_coverage_5/group_std_mean": 0.1771041363477707, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012719610892236232, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012719610892236232, "signal/brier_reward/centered_abs_mean": 0.12355803400278091, "signal/brier_reward/group_std_mean": 0.1653554379940033, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01235580388456583, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01235580388456583, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08606803268194199, "signal/confidence_uniqueness_reward/group_std_mean": 0.11045795083045959, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008606803603470325, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008606803603470325, "signal/format_reward/centered_abs_mean": 0.011447482369840145, "signal/format_reward/group_std_mean": 0.020899421907961367, "signal/format_reward/group_zero_std_frac": 0.9138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005723741184920072, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005723741184920072, "signal/frontier_aurc_reward/centered_abs_mean": 0.00131355298217386, "signal/frontier_aurc_reward/group_std_mean": 0.0020352060673758388, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.641941307752859e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.641941307752859e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007170586753636599, "signal/frontier_ece_reward/group_std_mean": 0.010662421584129333, "signal/frontier_ece_reward/group_zero_std_frac": 0.03333333395421505, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007170586846768856, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007170586846768856, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27514847218990324, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3442379176616669, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1833333343267441, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027514847368001936, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027514847368001936, "step": 195 }, { "calibration/aurc": 0.12895234263738056, "calibration/batch_distribution_entropy": 0.5441346575497024, "calibration/buffer_distribution_entropy": 0.6665037511135818, "calibration/confidence_entropy": 0.19706353954289205, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.05706806282722513, "calibration/coverage@10%": 0.4662888173761813, "calibration/coverage@15%": 0.6941641812131418, "calibration/coverage@20%": 0.894394340529808, "calibration/coverage@25%": 0.9372682155322863, "calibration/coverage@30%": 0.9618128272251308, "calibration/coverage@5%": 0.3992024414655271, "calibration/ece": 0.12510555245208535, "calibration/mean_confidence": 0.7964144439428475, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0065104166666666964, "completions/max_length": 3574.4, "completions/max_terminated_length": 3574.4, "completions/mean_length": 991.6319702148437, "completions/mean_terminated_length": 998.1380615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 348.0, "epoch": 0.47999400007499904, "grad_norm": 0.0006386162131093442, "learning_rate": 2.409638554216868e-07, "loss": -0.0062, "num_tokens": 504315115.0, "reward": 1.3395307540893555, "reward_std": 0.1739016592502594, "rewards/accuracy_reward": 0.6907986044883728, "rewards/batch_coverage_0": 0.46083222031593324, "rewards/batch_coverage_1": 0.46083222031593324, "rewards/batch_coverage_10": 0.5888261795043945, "rewards/batch_coverage_15": 0.6063881635665893, "rewards/batch_coverage_20": 0.6214998483657836, "rewards/batch_coverage_25": 0.6275609135627747, "rewards/batch_coverage_5": 0.5429104447364808, "rewards/brier_reward": 0.8572309732437133, "rewards/confidence_uniqueness_reward": 0.816546094417572, "rewards/format_reward": 0.9934027791023254, "rewards/frontier_aurc_reward": -0.001357408077456057, "rewards/frontier_ece_reward": 0.005987100955098868, "rewards/frontier_entropy_batch_reward": -0.6141442060470581, "signal/accuracy_reward/centered_abs_mean": 0.14255641996860505, "signal/accuracy_reward/group_std_mean": 0.18642587959766388, "signal/accuracy_reward/group_zero_std_frac": 0.475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07127820998430252, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07127820998430252, "signal/advantage_abs_mean": 0.12579307109117507, "signal/advantage_pre_scale_abs_mean": 0.12579307109117507, "signal/advantage_pre_scale_std": 0.2174198657274246, "signal/advantage_std": 0.2174198657274246, "signal/batch_coverage_0/centered_abs_mean": 0.11439124047756195, "signal/batch_coverage_0/group_std_mean": 0.15456107556819915, "signal/batch_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011439124494791031, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011439124494791031, "signal/batch_coverage_1/centered_abs_mean": 0.11439124047756195, "signal/batch_coverage_1/group_std_mean": 0.15456107556819915, "signal/batch_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011439124494791031, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011439124494791031, "signal/batch_coverage_10/centered_abs_mean": 0.13597213476896286, "signal/batch_coverage_10/group_std_mean": 0.18966183364391326, "signal/batch_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01359721329063177, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01359721329063177, "signal/batch_coverage_15/centered_abs_mean": 0.14407963156700135, "signal/batch_coverage_15/group_std_mean": 0.20045292973518372, "signal/batch_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014407963491976261, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.014407963491976261, "signal/batch_coverage_20/centered_abs_mean": 0.14974366426467894, "signal/batch_coverage_20/group_std_mean": 0.20867671072483063, "signal/batch_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014974366687238217, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.014974366687238217, "signal/batch_coverage_25/centered_abs_mean": 0.15635756254196168, "signal/batch_coverage_25/group_std_mean": 0.21556124687194825, "signal/batch_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015635756216943263, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015635756216943263, "signal/batch_coverage_5/centered_abs_mean": 0.12410448342561722, "signal/batch_coverage_5/group_std_mean": 0.17132916748523713, "signal/batch_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012410448491573333, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012410448491573333, "signal/brier_reward/centered_abs_mean": 0.11009028404951096, "signal/brier_reward/group_std_mean": 0.14840916395187378, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011009028740227222, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011009028740227222, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08328192979097367, "signal/confidence_uniqueness_reward/group_std_mean": 0.11021423190832139, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008328192867338657, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008328192867338657, "signal/format_reward/centered_abs_mean": 0.012076823133975267, "signal/format_reward/group_std_mean": 0.026156437024474143, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0060384115669876335, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0060384115669876335, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012080914457328618, "signal/frontier_aurc_reward/group_std_mean": 0.001900116284377873, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5101143071660772e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5101143071660772e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.006761191785335541, "signal/frontier_ece_reward/group_std_mean": 0.009993968904018402, "signal/frontier_ece_reward/group_zero_std_frac": 0.03333333395421505, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006761191645637155, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006761191645637155, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25325422883033755, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3234684646129608, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1944444477558136, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0253254234790802, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0253254234790802, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.13414728793506886, "eval_calibration/batch_distribution_entropy": 0.5747845907279023, "eval_calibration/buffer_distribution_entropy": 0.66335620413197, "eval_calibration/confidence_entropy": 0.20467980469134264, "eval_calibration/coverage@0%": 0.17825940860215053, "eval_calibration/coverage@1%": 0.17825940860215053, "eval_calibration/coverage@10%": 0.3970094086021505, "eval_calibration/coverage@15%": 0.76377688172043, "eval_calibration/coverage@20%": 0.8629032258064516, "eval_calibration/coverage@25%": 0.9102822580645161, "eval_calibration/coverage@30%": 0.9469086021505376, "eval_calibration/coverage@5%": 0.21471774193548387, "eval_calibration/ece": 0.18191234422153416, "eval_calibration/mean_confidence": 0.7088780976480996, "eval_completions/clipped_ratio": 0.002604166666666685, "eval_completions/max_length": 2817.1666666666665, "eval_completions/max_terminated_length": 2817.1666666666665, "eval_completions/mean_length": 1001.8477579752604, "eval_completions/mean_terminated_length": 1004.473866780599, "eval_completions/min_length": 224.83333333333334, "eval_completions/min_terminated_length": 424.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 504315115.0, "eval_reward": 1.0336094597975414, "eval_reward_std": 0.2966693192720413, "eval_rewards/accuracy_reward": 0.667534718910853, "eval_rewards/batch_coverage_0": 0.11816969886422157, "eval_rewards/batch_coverage_1": 0.11816969886422157, "eval_rewards/batch_coverage_10": 0.15712855011224747, "eval_rewards/batch_coverage_15": 0.20995599528153738, "eval_rewards/batch_coverage_20": 0.2796262999375661, "eval_rewards/batch_coverage_25": 0.3676889066894849, "eval_rewards/batch_coverage_5": 0.12367384632428487, "eval_rewards/brier_reward": 0.845408578713735, "eval_rewards/confidence_uniqueness_reward": 0.7948392828305563, "eval_rewards/format_reward": 0.9947916766007742, "eval_rewards/frontier_aurc_reward": -0.0013247675669845194, "eval_rewards/frontier_ece_reward": 0.0047586263390257955, "eval_rewards/frontier_entropy_batch_reward": -0.9947916766007742, "eval_runtime": 188.2209, "eval_samples_per_second": 5.313, "eval_signal/accuracy_reward/centered_abs_mean": 0.4306098024050395, "eval_signal/accuracy_reward/group_std_mean": 0.47044746081034344, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21530490120251974, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21530490120251974, "eval_signal/advantage_abs_mean": 0.24172887206077576, "eval_signal/advantage_pre_scale_abs_mean": 0.24172887206077576, "eval_signal/advantage_pre_scale_std": 0.2949426124493281, "eval_signal/advantage_std": 0.2949426124493281, "eval_signal/batch_coverage_0/centered_abs_mean": 0.27022289981444675, "eval_signal/batch_coverage_0/group_std_mean": 0.38285597662130993, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02702228942265113, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.02702228942265113, "eval_signal/batch_coverage_1/centered_abs_mean": 0.27022289981444675, "eval_signal/batch_coverage_1/group_std_mean": 0.38285597662130993, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02702228942265113, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.02702228942265113, "eval_signal/batch_coverage_10/centered_abs_mean": 0.2386037011941274, "eval_signal/batch_coverage_10/group_std_mean": 0.32710455854733783, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02386037011941274, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.02386037011941274, "eval_signal/batch_coverage_15/centered_abs_mean": 0.25227805972099304, "eval_signal/batch_coverage_15/group_std_mean": 0.32569801310698193, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02522780777265628, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.02522780777265628, "eval_signal/batch_coverage_20/centered_abs_mean": 0.285963773727417, "eval_signal/batch_coverage_20/group_std_mean": 0.3457394639650981, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02859637700021267, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.02859637700021267, "eval_signal/batch_coverage_25/centered_abs_mean": 0.39723547796408337, "eval_signal/batch_coverage_25/group_std_mean": 0.4638222207625707, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03972354965905348, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03972354965905348, "eval_signal/batch_coverage_5/centered_abs_mean": 0.2554217278957367, "eval_signal/batch_coverage_5/group_std_mean": 0.36187784870465595, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.025542172603309155, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.025542172603309155, "eval_signal/brier_reward/centered_abs_mean": 0.22214544067780176, "eval_signal/brier_reward/group_std_mean": 0.29984481632709503, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022214544626573723, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022214544626573723, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1260842519501845, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.14876357093453407, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012608425381282965, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012608425381282965, "eval_signal/format_reward/centered_abs_mean": 0.009982638837148746, "eval_signal/format_reward/group_std_mean": 0.026473373485108215, "eval_signal/format_reward/group_zero_std_frac": 0.8611111342906952, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.004991319418574373, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004991319418574373, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020859644864685833, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004563994938507676, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.607455629307272e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.607455629307272e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.008840923585618535, "eval_signal/frontier_ece_reward/group_std_mean": 0.014323081355541945, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008840923255775124, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008840923255775124, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.009982638837148746, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.026473373485108215, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8611111342906952, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0009982639652055998, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0009982639652055998, "eval_steps_per_second": 0.032, "step": 200 }, { "calibration/aurc": 0.1384997430281988, "calibration/batch_distribution_entropy": 0.5506939823270676, "calibration/buffer_distribution_entropy": 0.6636535644652605, "calibration/confidence_entropy": 0.18998257839477467, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.05301837270341207, "calibration/coverage@10%": 0.402384636060499, "calibration/coverage@15%": 0.6366007892223926, "calibration/coverage@20%": 0.8371666648008708, "calibration/coverage@25%": 0.8847188877100887, "calibration/coverage@30%": 0.923436125769569, "calibration/coverage@5%": 0.26199896856972693, "calibration/ece": 0.13033047827590277, "calibration/mean_confidence": 0.7532977819177902, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007204861111111094, "completions/max_length": 3430.6, "completions/max_terminated_length": 3430.6, "completions/mean_length": 1005.3292724609375, "completions/mean_terminated_length": 1012.6463989257812, "completions/min_length": 0.0, "completions/min_terminated_length": 348.0, "epoch": 0.491993850076874, "grad_norm": 0.0005171684897504747, "learning_rate": 9.036144578313253e-08, "loss": -0.0062, "num_tokens": 518962460.0, "reward": 1.3564418792724608, "reward_std": 0.17012038230895996, "rewards/accuracy_reward": 0.7057291746139527, "rewards/batch_coverage_0": 0.4720217764377594, "rewards/batch_coverage_1": 0.4720217764377594, "rewards/batch_coverage_10": 0.6029796481132508, "rewards/batch_coverage_15": 0.6217110633850098, "rewards/batch_coverage_20": 0.637014639377594, "rewards/batch_coverage_25": 0.6375706434249878, "rewards/batch_coverage_5": 0.5604067802429199, "rewards/brier_reward": 0.8739692211151123, "rewards/confidence_uniqueness_reward": 0.817440927028656, "rewards/format_reward": 0.9927951335906983, "rewards/frontier_aurc_reward": -0.000946720875799656, "rewards/frontier_ece_reward": 0.005940455570816993, "rewards/frontier_entropy_batch_reward": -0.6291620016098023, "signal/accuracy_reward/centered_abs_mean": 0.1403971344232559, "signal/accuracy_reward/group_std_mean": 0.18923761546611786, "signal/accuracy_reward/group_zero_std_frac": 0.44444444179534914, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07019856721162795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07019856721162795, "signal/advantage_abs_mean": 0.11906714290380478, "signal/advantage_pre_scale_abs_mean": 0.11906714290380478, "signal/advantage_pre_scale_std": 0.21315287351608275, "signal/advantage_std": 0.21315287351608275, "signal/batch_coverage_0/centered_abs_mean": 0.11111615747213363, "signal/batch_coverage_0/group_std_mean": 0.1536956250667572, "signal/batch_coverage_0/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011111615598201752, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011111615598201752, "signal/batch_coverage_1/centered_abs_mean": 0.11111615747213363, "signal/batch_coverage_1/group_std_mean": 0.1536956250667572, "signal/batch_coverage_1/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011111615598201752, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011111615598201752, "signal/batch_coverage_10/centered_abs_mean": 0.12938762009143828, "signal/batch_coverage_10/group_std_mean": 0.18510680198669432, "signal/batch_coverage_10/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012938761338591575, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012938761338591575, "signal/batch_coverage_15/centered_abs_mean": 0.1383065789937973, "signal/batch_coverage_15/group_std_mean": 0.19660945236682892, "signal/batch_coverage_15/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013830658420920372, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013830658420920372, "signal/batch_coverage_20/centered_abs_mean": 0.1493982344865799, "signal/batch_coverage_20/group_std_mean": 0.21061743199825286, "signal/batch_coverage_20/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014939823932945729, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.014939823932945729, "signal/batch_coverage_25/centered_abs_mean": 0.15058391988277436, "signal/batch_coverage_25/group_std_mean": 0.2111032247543335, "signal/batch_coverage_25/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015058392658829689, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015058392658829689, "signal/batch_coverage_5/centered_abs_mean": 0.12164954096078873, "signal/batch_coverage_5/group_std_mean": 0.1719556510448456, "signal/batch_coverage_5/group_zero_std_frac": 0.008333333395421505, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012164954654872417, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012164954654872417, "signal/brier_reward/centered_abs_mean": 0.10298076122999192, "signal/brier_reward/group_std_mean": 0.14395154118537903, "signal/brier_reward/group_zero_std_frac": 0.008333333395421505, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010298076085746288, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010298076085746288, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07968108505010604, "signal/confidence_uniqueness_reward/group_std_mean": 0.10596004277467727, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.008333333395421505, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007968108542263508, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007968108542263508, "signal/format_reward/centered_abs_mean": 0.012679036241024733, "signal/format_reward/group_std_mean": 0.02613435760140419, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006339518120512366, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006339518120512366, "signal/frontier_aurc_reward/centered_abs_mean": 0.0008092402247712016, "signal/frontier_aurc_reward/group_std_mean": 0.001309644477441907, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.0115502846019808e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.0115502846019808e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.006613474618643522, "signal/frontier_ece_reward/group_std_mean": 0.009865978732705116, "signal/frontier_ece_reward/group_zero_std_frac": 0.033333334140479565, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006613474688492715, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006613474688492715, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26013609766960144, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3299739480018616, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.18888888955116273, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026013610139489173, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026013610139489173, "step": 205 }, { "calibration/aurc": 0.07137097713366734, "calibration/batch_distribution_entropy": 0.5610990286976337, "calibration/buffer_distribution_entropy": 0.6644328642125057, "calibration/confidence_entropy": 0.20935073579914096, "calibration/coverage@0%": 0.06613756613756613, "calibration/coverage@1%": 0.10846560846560847, "calibration/coverage@10%": 0.7219348181390065, "calibration/coverage@15%": 0.8911419522239766, "calibration/coverage@20%": 0.9341302159801287, "calibration/coverage@25%": 0.9753640451720731, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5699630558787409, "calibration/ece": 0.10638484090913493, "calibration/mean_confidence": 0.7933820565499561, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00708912037037035, "completions/max_length": 3432.0, "completions/max_terminated_length": 3432.0, "completions/mean_length": 1000.5524088541666, "completions/mean_terminated_length": 1007.8630981445312, "completions/min_length": 0.0, "completions/min_terminated_length": 353.0, "epoch": 0.49919376007799904, "num_tokens": 527744326.0, "reward": 1.3347638448079426, "reward_std": 0.17598708967367807, "rewards/accuracy_reward": 0.6691261529922485, "rewards/batch_coverage_0": 0.45231754581133526, "rewards/batch_coverage_1": 0.45231754581133526, "rewards/batch_coverage_10": 0.5946024060249329, "rewards/batch_coverage_15": 0.6113624970118204, "rewards/batch_coverage_20": 0.6223725279172262, "rewards/batch_coverage_25": 0.6295110781987509, "rewards/batch_coverage_5": 0.5561538636684418, "rewards/brier_reward": 0.8604856729507446, "rewards/confidence_uniqueness_reward": 0.8426164587338766, "rewards/format_reward": 0.9929108818372091, "rewards/frontier_aurc_reward": -0.0009377729729749262, "rewards/frontier_ece_reward": 0.005627031127611796, "rewards/frontier_entropy_batch_reward": -0.5897964437802633, "signal/accuracy_reward/centered_abs_mean": 0.14289460331201553, "signal/accuracy_reward/group_std_mean": 0.19061768054962158, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07144730165600777, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07144730165600777, "signal/advantage_abs_mean": 0.12517134100198746, "signal/advantage_pre_scale_abs_mean": 0.12517134100198746, "signal/advantage_pre_scale_std": 0.2169480174779892, "signal/advantage_std": 0.2169480174779892, "signal/batch_coverage_0/centered_abs_mean": 0.11347770194212596, "signal/batch_coverage_0/group_std_mean": 0.15858340760072073, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01134777038047711, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01134777038047711, "signal/batch_coverage_1/centered_abs_mean": 0.11347770194212596, "signal/batch_coverage_1/group_std_mean": 0.15858340760072073, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01134777038047711, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01134777038047711, "signal/batch_coverage_10/centered_abs_mean": 0.13170498609542847, "signal/batch_coverage_10/group_std_mean": 0.19124586880207062, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013170498423278332, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013170498423278332, "signal/batch_coverage_15/centered_abs_mean": 0.13884608447551727, "signal/batch_coverage_15/group_std_mean": 0.20076180001099905, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013884608633816242, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013884608633816242, "signal/batch_coverage_20/centered_abs_mean": 0.14567939937114716, "signal/batch_coverage_20/group_std_mean": 0.2094889134168625, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01456794049590826, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01456794049590826, "signal/batch_coverage_25/centered_abs_mean": 0.15283007423082987, "signal/batch_coverage_25/group_std_mean": 0.21731575826803842, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015283007795612017, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015283007795612017, "signal/batch_coverage_5/centered_abs_mean": 0.1270467018087705, "signal/batch_coverage_5/group_std_mean": 0.18080047766367593, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012704670739670595, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012704670739670595, "signal/brier_reward/centered_abs_mean": 0.11329849809408188, "signal/brier_reward/group_std_mean": 0.15196349223454794, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011329849561055502, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011329849561055502, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07130381713310878, "signal/confidence_uniqueness_reward/group_std_mean": 0.09505186229944229, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0071303822721044225, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0071303822721044225, "signal/format_reward/centered_abs_mean": 0.012776693018774191, "signal/format_reward/group_std_mean": 0.027781900639335316, "signal/format_reward/group_zero_std_frac": 0.8703703681627909, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0063883465093870955, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0063883465093870955, "signal/frontier_aurc_reward/centered_abs_mean": 0.000829536720023801, "signal/frontier_aurc_reward/group_std_mean": 0.0013354160667707522, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.0369208666816121e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.0369208666816121e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007324707694351673, "signal/frontier_ece_reward/group_std_mean": 0.010262228238085905, "signal/frontier_ece_reward/group_zero_std_frac": 0.013888888992369175, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007324707500326136, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007324707500326136, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26471273104349774, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33471423387527466, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.16666666666666666, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02647127149005731, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02647127149005731, "step": 208, "total_flos": 0.0, "train_loss": -0.008862279910737505, "train_runtime": 43390.7211, "train_samples_per_second": 0.346, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 527744326, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }