{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6373261091843637, "calibration/batch_distribution_entropy": 0.6465861666510452, "calibration/confidence_entropy": 0.34232269490001105, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5009005491505466, "calibration/mean_confidence": 0.7910126313284384, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03662109375, "completions/max_length": 1507.6, "completions/max_terminated_length": 1507.6, "completions/mean_length": 215.51953125, "completions/mean_terminated_length": 223.70487365722656, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.1652095913887024, "learning_rate": 3.1249999999999997e-07, "loss": 0.0242, "num_tokens": 17050952.0, "reward": 0.4797627925872803, "reward_std": 0.35704264640808103, "rewards/accuracy_reward": 0.21953125, "rewards/batch_coverage_0": 0.0552757054567337, "rewards/batch_coverage_1": 0.0552757054567337, "rewards/batch_coverage_10": 0.07531758025288582, "rewards/batch_coverage_15": 0.08600667119026184, "rewards/batch_coverage_20": 0.09857667386531829, "rewards/batch_coverage_25": 0.10926464796066285, "rewards/batch_coverage_5": 0.06029992550611496, "rewards/brier_reward": 0.37625510096549986, "rewards/confidence_uniqueness_reward": 0.4875007390975952, "rewards/format_reward": 0.681640625, "rewards/frontier_entropy_batch_reward": -0.6492096781730652, "signal/accuracy_reward/centered_abs_mean": 0.23970947265625, "signal/accuracy_reward/group_std_mean": 0.2809469699859619, "signal/accuracy_reward/group_zero_std_frac": 0.328125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.35993377566337587, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.119854736328125, "signal/advantage_abs_mean": 0.8450886011123657, "signal/advantage_pre_scale_abs_mean": 0.30423404574394225, "signal/advantage_pre_scale_std": 0.36350384950637815, "signal/advantage_std": 0.9841925024986267, "signal/batch_coverage_0/centered_abs_mean": 0.07735746204853058, "signal/batch_coverage_0/group_std_mean": 0.12862617671489715, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.003360638115555048, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0011062117060646414, "signal/batch_coverage_1/centered_abs_mean": 0.07735746204853058, "signal/batch_coverage_1/group_std_mean": 0.12862617671489715, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.003360638115555048, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0011062117060646414, "signal/batch_coverage_10/centered_abs_mean": 0.09151717722415924, "signal/batch_coverage_10/group_std_mean": 0.1435583233833313, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.003964567929506302, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0013086956227198244, "signal/batch_coverage_15/centered_abs_mean": 0.10193184614181519, "signal/batch_coverage_15/group_std_mean": 0.15447192192077636, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.004429516848176718, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.001457625417970121, "signal/batch_coverage_20/centered_abs_mean": 0.11815007328987122, "signal/batch_coverage_20/group_std_mean": 0.17157170474529265, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.005136752594262361, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0016895460663363337, "signal/batch_coverage_25/centered_abs_mean": 0.1344620779156685, "signal/batch_coverage_25/group_std_mean": 0.18939262628555298, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.005850685015320778, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.001922807819209993, "signal/batch_coverage_5/centered_abs_mean": 0.08038161844015121, "signal/batch_coverage_5/group_std_mean": 0.1318250775337219, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0034911792725324632, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0011494571343064308, "signal/brier_reward/centered_abs_mean": 0.3200297772884369, "signal/brier_reward/group_std_mean": 0.36500824689865113, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09681591242551804, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.032002977281808856, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.3005684792995453, "signal/confidence_uniqueness_reward/group_std_mean": 0.35096290707588196, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09130170047283173, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.030056847631931304, "signal/format_reward/centered_abs_mean": 0.40726318359375, "signal/format_reward/group_std_mean": 0.4561456859111786, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.61845782995224, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.203631591796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43004666566848754, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4745893657207489, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.13057637959718704, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04300466775894165, "step": 5 }, { "calibration/aurc": 0.6795537579618556, "calibration/batch_distribution_entropy": 0.6636404394336322, "calibration/confidence_entropy": 0.34291309742836623, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5240589456018558, "calibration/mean_confidence": 0.7873437694455336, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030859375, "completions/max_length": 1486.2, "completions/max_terminated_length": 1486.2, "completions/mean_length": 201.25380859375, "completions/mean_terminated_length": 207.68313598632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.1985611766576767, "learning_rate": 6.249999999999999e-07, "loss": 0.0198, "num_tokens": 34212143.0, "reward": 0.514380669593811, "reward_std": 0.33193816542625426, "rewards/accuracy_reward": 0.21796875, "rewards/batch_coverage_0": 0.054630983620882034, "rewards/batch_coverage_1": 0.054630983620882034, "rewards/batch_coverage_10": 0.09490734785795212, "rewards/batch_coverage_15": 0.10108360797166824, "rewards/batch_coverage_20": 0.11422456949949264, "rewards/batch_coverage_25": 0.12260636389255523, "rewards/batch_coverage_5": 0.07091258615255355, "rewards/brier_reward": 0.39744025468826294, "rewards/confidence_uniqueness_reward": 0.5402819037437439, "rewards/format_reward": 0.748046875, "rewards/frontier_entropy_batch_reward": -0.711652135848999, "signal/accuracy_reward/centered_abs_mean": 0.223095703125, "signal/accuracy_reward/group_std_mean": 0.2711519658565521, "signal/accuracy_reward/group_zero_std_frac": 0.309375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3662183403968811, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1115478515625, "signal/advantage_abs_mean": 0.7963252782821655, "signal/advantage_pre_scale_abs_mean": 0.2699368894100189, "signal/advantage_pre_scale_std": 0.3400941789150238, "signal/advantage_std": 0.9841671347618103, "signal/batch_coverage_0/centered_abs_mean": 0.0823998749256134, "signal/batch_coverage_0/group_std_mean": 0.13471100330352784, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0038847104646265508, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001178318215534091, "signal/batch_coverage_1/centered_abs_mean": 0.0823998749256134, "signal/batch_coverage_1/group_std_mean": 0.13471100330352784, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0038847104646265508, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001178318215534091, "signal/batch_coverage_10/centered_abs_mean": 0.10847108513116836, "signal/batch_coverage_10/group_std_mean": 0.16454391479492186, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.005101960431784391, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0015511365141719579, "signal/batch_coverage_15/centered_abs_mean": 0.11424930393695831, "signal/batch_coverage_15/group_std_mean": 0.17042292654514313, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.005372866988182068, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0016337650828063487, "signal/batch_coverage_20/centered_abs_mean": 0.12856489270925522, "signal/batch_coverage_20/group_std_mean": 0.18573523461818695, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0060584286227822306, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0018384779570624233, "signal/batch_coverage_25/centered_abs_mean": 0.1404997855424881, "signal/batch_coverage_25/group_std_mean": 0.19834802746772767, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0066345173865556715, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020091469399631024, "signal/batch_coverage_5/centered_abs_mean": 0.09054070562124253, "signal/batch_coverage_5/group_std_mean": 0.14401516318321228, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.004269297886639834, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0012947321170940994, "signal/brier_reward/centered_abs_mean": 0.3056317925453186, "signal/brier_reward/group_std_mean": 0.3545816421508789, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10050017833709717, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03056318052113056, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2679386377334595, "signal/confidence_uniqueness_reward/group_std_mean": 0.3307202637195587, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08798813968896865, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.026793863996863364, "signal/format_reward/centered_abs_mean": 0.35068359375, "signal/format_reward/group_std_mean": 0.4214269757270813, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5750837802886963, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.175341796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3855856955051422, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4489862143993378, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12660761475563048, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038558568060398105, "step": 10 }, { "calibration/aurc": 0.6169754589257901, "calibration/batch_distribution_entropy": 0.6509359190029964, "calibration/buffer_distribution_entropy": 0.6632450401167147, "calibration/confidence_entropy": 0.3447272173329581, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47127741331913714, "calibration/mean_confidence": 0.7999372677967667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0150390625, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 165.9875, "completions/mean_terminated_length": 168.5960906982422, "completions/min_length": 0.0, "completions/min_terminated_length": 29.2, "epoch": 0.048, "grad_norm": 0.02442619577050209, "learning_rate": 9.374999999999999e-07, "loss": 0.0036, "num_tokens": 50960591.0, "reward": 0.6489390015602112, "reward_std": 0.2351370334625244, "rewards/accuracy_reward": 0.2857421875, "rewards/batch_coverage_0": 0.08331729024648667, "rewards/batch_coverage_1": 0.08331729024648667, "rewards/batch_coverage_10": 0.14008204489946366, "rewards/batch_coverage_15": 0.15269193649291993, "rewards/batch_coverage_20": 0.1704973042011261, "rewards/batch_coverage_25": 0.17928307056427, "rewards/batch_coverage_5": 0.09736606627702712, "rewards/brier_reward": 0.5068644285202026, "rewards/confidence_uniqueness_reward": 0.6822745203971863, "rewards/format_reward": 0.92216796875, "rewards/frontier_entropy_batch_reward": -0.868937087059021, "signal/accuracy_reward/centered_abs_mean": 0.19013671875, "signal/accuracy_reward/group_std_mean": 0.239943265914917, "signal/accuracy_reward/group_zero_std_frac": 0.3625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.5162946462631226, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.095068359375, "signal/advantage_abs_mean": 0.68557368516922, "signal/advantage_pre_scale_abs_mean": 0.17026761323213577, "signal/advantage_pre_scale_std": 0.24687082171440125, "signal/advantage_std": 0.9839404463768006, "signal/batch_coverage_0/centered_abs_mean": 0.10061688423156738, "signal/batch_coverage_0/group_std_mean": 0.15735834538936616, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.008269770722836255, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001438821479678154, "signal/batch_coverage_1/centered_abs_mean": 0.10061688423156738, "signal/batch_coverage_1/group_std_mean": 0.15735834538936616, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.008269770722836255, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001438821479678154, "signal/batch_coverage_10/centered_abs_mean": 0.12309518903493881, "signal/batch_coverage_10/group_std_mean": 0.1861822485923767, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.010132433753460646, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0017602612962946296, "signal/batch_coverage_15/centered_abs_mean": 0.1313455581665039, "signal/batch_coverage_15/group_std_mean": 0.19549228847026826, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.010681292042136192, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.001878241472877562, "signal/batch_coverage_20/centered_abs_mean": 0.14566806256771087, "signal/batch_coverage_20/group_std_mean": 0.21204695403575896, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011906150355935096, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020830533001571895, "signal/batch_coverage_25/centered_abs_mean": 0.15666291415691375, "signal/batch_coverage_25/group_std_mean": 0.22407272458076477, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012635924853384495, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0022402796894311905, "signal/batch_coverage_5/centered_abs_mean": 0.10522163063287734, "signal/batch_coverage_5/group_std_mean": 0.16216584146022797, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.008624614495784045, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0015046692918986083, "signal/brier_reward/centered_abs_mean": 0.25743305683135986, "signal/brier_reward/group_std_mean": 0.3131587505340576, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14089297950267793, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.025743305310606958, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17150525450706483, "signal/confidence_uniqueness_reward/group_std_mean": 0.22964869439601898, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09172600209712982, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017150526121258735, "signal/format_reward/centered_abs_mean": 0.134808349609375, "signal/format_reward/group_std_mean": 0.23255448937416076, "signal/format_reward/group_zero_std_frac": 0.153125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.3284162819385529, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0674041748046875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21744501888751983, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3348918974399567, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.034375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11601799428462982, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02174450196325779, "step": 15 }, { "calibration/aurc": 0.5160265874145994, "calibration/batch_distribution_entropy": 0.7821093596540675, "calibration/buffer_distribution_entropy": 0.6721649986596376, "calibration/confidence_entropy": 0.419975819470785, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3147944777253985, "calibration/mean_confidence": 0.7203686667722206, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00400390625, "completions/max_length": 1025.6, "completions/max_terminated_length": 1025.6, "completions/mean_length": 128.2537109375, "completions/mean_terminated_length": 128.78225402832032, "completions/min_length": 0.0, "completions/min_terminated_length": 30.6, "epoch": 0.064, "grad_norm": 0.031037550419569016, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 67192309.0, "reward": 0.7435286998748779, "reward_std": 0.18170610964298248, "rewards/accuracy_reward": 0.3544921875, "rewards/batch_coverage_0": 0.10424036830663681, "rewards/batch_coverage_1": 0.10424036830663681, "rewards/batch_coverage_10": 0.17098541855812072, "rewards/batch_coverage_15": 0.18716970086097717, "rewards/batch_coverage_20": 0.20965131223201752, "rewards/batch_coverage_25": 0.21948152482509614, "rewards/batch_coverage_5": 0.1322124183177948, "rewards/brier_reward": 0.6193184971809387, "rewards/confidence_uniqueness_reward": 0.8077521324157715, "rewards/format_reward": 0.987109375, "rewards/frontier_entropy_batch_reward": -0.8610928893089295, "signal/accuracy_reward/centered_abs_mean": 0.1988037109375, "signal/accuracy_reward/group_std_mean": 0.2517113327980042, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7254745364189148, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09940185546875, "signal/advantage_abs_mean": 0.7277890801429748, "signal/advantage_pre_scale_abs_mean": 0.1374871164560318, "signal/advantage_pre_scale_std": 0.1954087108373642, "signal/advantage_std": 0.983764922618866, "signal/batch_coverage_0/centered_abs_mean": 0.13318662196397782, "signal/batch_coverage_0/group_std_mean": 0.19517841041088105, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014266725815832614, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001904568658210337, "signal/batch_coverage_1/centered_abs_mean": 0.13318662196397782, "signal/batch_coverage_1/group_std_mean": 0.19517841041088105, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014266725815832614, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001904568658210337, "signal/batch_coverage_10/centered_abs_mean": 0.15405770838260652, "signal/batch_coverage_10/group_std_mean": 0.22008646428585052, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01639944761991501, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0022030251566320658, "signal/batch_coverage_15/centered_abs_mean": 0.16269078552722932, "signal/batch_coverage_15/group_std_mean": 0.23098040223121644, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01727539598941803, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0023264782037585975, "signal/batch_coverage_20/centered_abs_mean": 0.17800569534301758, "signal/batch_coverage_20/group_std_mean": 0.24921154975891113, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01889161504805088, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025454814080148936, "signal/batch_coverage_25/centered_abs_mean": 0.1886595755815506, "signal/batch_coverage_25/group_std_mean": 0.2600310921669006, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.019977013394236565, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0026978319510817528, "signal/batch_coverage_5/centered_abs_mean": 0.13942071199417114, "signal/batch_coverage_5/group_std_mean": 0.20169951319694518, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01490629930049181, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019937161123380065, "signal/brier_reward/centered_abs_mean": 0.23416422307491302, "signal/brier_reward/group_std_mean": 0.28761149048805235, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1729327619075775, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023416423052549363, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08846724629402161, "signal/confidence_uniqueness_reward/group_std_mean": 0.12165791392326356, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06507326290011406, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008846724499017, "signal/format_reward/centered_abs_mean": 0.02449951171875, "signal/format_reward/group_std_mean": 0.06146693043410778, "signal/format_reward/group_zero_std_frac": 0.690625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08888003826141358, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012249755859375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22997065484523774, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36477545499801634, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.1725946694612503, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022997065633535384, "step": 20 }, { "calibration/aurc": 0.659273068888402, "calibration/batch_distribution_entropy": 0.9708581625874132, "calibration/buffer_distribution_entropy": 0.7680514501525051, "calibration/confidence_entropy": 0.49606242445239357, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.24383066141563323, "calibration/mean_confidence": 0.4667354515596018, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0033203125, "completions/max_length": 825.0, "completions/max_terminated_length": 825.0, "completions/mean_length": 106.2681640625, "completions/mean_terminated_length": 106.61890869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 35.8, "epoch": 0.08, "grad_norm": 0.013238995335996151, "learning_rate": 1e-06, "loss": -0.0245, "num_tokens": 83213647.0, "reward": 0.8145912766456604, "reward_std": 0.15870223343372344, "rewards/accuracy_reward": 0.34990234375, "rewards/batch_coverage_0": 0.19447994828224183, "rewards/batch_coverage_1": 0.19447994828224183, "rewards/batch_coverage_10": 0.23603789806365966, "rewards/batch_coverage_15": 0.24778175055980683, "rewards/batch_coverage_20": 0.2619646489620209, "rewards/batch_coverage_25": 0.27012325525283815, "rewards/batch_coverage_5": 0.21396056711673736, "rewards/brier_reward": 0.7053467035293579, "rewards/confidence_uniqueness_reward": 0.9336926817893982, "rewards/format_reward": 0.99267578125, "rewards/frontier_entropy_batch_reward": -0.4375097811222076, "signal/accuracy_reward/centered_abs_mean": 0.193804931640625, "signal/accuracy_reward/group_std_mean": 0.2428890973329544, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8063225746154785, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0969024658203125, "signal/advantage_abs_mean": 0.7467060685157776, "signal/advantage_pre_scale_abs_mean": 0.12001456767320633, "signal/advantage_pre_scale_std": 0.17081058919429778, "signal/advantage_std": 0.9836754441261292, "signal/batch_coverage_0/centered_abs_mean": 0.2497153639793396, "signal/batch_coverage_0/group_std_mean": 0.31533910632133483, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02987198568880558, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0035709296353161335, "signal/batch_coverage_1/centered_abs_mean": 0.2497153639793396, "signal/batch_coverage_1/group_std_mean": 0.31533910632133483, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02987198568880558, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0035709296353161335, "signal/batch_coverage_10/centered_abs_mean": 0.25728048086166383, "signal/batch_coverage_10/group_std_mean": 0.3238477647304535, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03072645589709282, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0036791109945625068, "signal/batch_coverage_15/centered_abs_mean": 0.2555805444717407, "signal/batch_coverage_15/group_std_mean": 0.32144402861595156, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03051029294729233, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003654801845550537, "signal/batch_coverage_20/centered_abs_mean": 0.25660484433174136, "signal/batch_coverage_20/group_std_mean": 0.323328423500061, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.030591926723718642, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0036694493144750597, "signal/batch_coverage_25/centered_abs_mean": 0.2620661616325378, "signal/batch_coverage_25/group_std_mean": 0.3306765556335449, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03118036426603794, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0037475463002920152, "signal/batch_coverage_5/centered_abs_mean": 0.25281934440135956, "signal/batch_coverage_5/group_std_mean": 0.3182687520980835, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.030213556066155434, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0036153166554868223, "signal/brier_reward/centered_abs_mean": 0.2312027245759964, "signal/brier_reward/group_std_mean": 0.28400464057922364, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19109801054000855, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023120272532105447, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03871437720954418, "signal/confidence_uniqueness_reward/group_std_mean": 0.0667199194431305, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03128206320106983, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038714376278221607, "signal/format_reward/centered_abs_mean": 0.014093017578125, "signal/format_reward/group_std_mean": 0.039295760542154314, "signal/format_reward/group_zero_std_frac": 0.784375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05787626802921295, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0070465087890625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4507632553577423, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5134151577949524, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3705613732337952, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04507632553577423, "step": 25 }, { "calibration/aurc": 0.6369551230672109, "calibration/batch_distribution_entropy": 0.8055908332520693, "calibration/buffer_distribution_entropy": 0.8725075291862959, "calibration/confidence_entropy": 0.440458014707745, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.003937007874015748, "calibration/coverage@25%": 0.006299212598425197, "calibration/coverage@30%": 0.01062992125984252, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2013707173339057, "calibration/mean_confidence": 0.24628456410768784, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00654296875, "completions/max_length": 1000.2, "completions/max_terminated_length": 1000.2, "completions/mean_length": 112.25400390625, "completions/mean_terminated_length": 112.99878387451172, "completions/min_length": 0.0, "completions/min_terminated_length": 41.6, "epoch": 0.096, "grad_norm": 0.022800516337156296, "learning_rate": 1e-06, "loss": -0.0329, "num_tokens": 99407736.0, "reward": 0.8120527863502502, "reward_std": 0.13627898395061494, "rewards/accuracy_reward": 0.36025390625, "rewards/batch_coverage_0": 0.2765601396560669, "rewards/batch_coverage_1": 0.2765601396560669, "rewards/batch_coverage_10": 0.30208975076675415, "rewards/batch_coverage_15": 0.31157302260398867, "rewards/batch_coverage_20": 0.3181931436061859, "rewards/batch_coverage_25": 0.3236126244068146, "rewards/batch_coverage_5": 0.2889863818883896, "rewards/brier_reward": 0.7170829892158508, "rewards/confidence_uniqueness_reward": 0.9133394241333008, "rewards/format_reward": 0.9904296875, "rewards/frontier_entropy_batch_reward": -0.5632658362388611, "signal/accuracy_reward/centered_abs_mean": 0.183563232421875, "signal/accuracy_reward/group_std_mean": 0.23314289450645448, "signal/accuracy_reward/group_zero_std_frac": 0.36875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.179761004447937, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0917816162109375, "signal/advantage_abs_mean": 0.703443419933319, "signal/advantage_pre_scale_abs_mean": 0.09668720364570618, "signal/advantage_pre_scale_std": 0.15405036211013795, "signal/advantage_std": 0.9832155346870423, "signal/batch_coverage_0/centered_abs_mean": 0.25192484855651853, "signal/batch_coverage_0/group_std_mean": 0.31211569905281067, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04653439298272133, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.003602525359019637, "signal/batch_coverage_1/centered_abs_mean": 0.25192484855651853, "signal/batch_coverage_1/group_std_mean": 0.31211569905281067, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04653439298272133, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.003602525359019637, "signal/batch_coverage_10/centered_abs_mean": 0.25177322924137113, "signal/batch_coverage_10/group_std_mean": 0.3137675106525421, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04646962657570839, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0036003570538014175, "signal/batch_coverage_15/centered_abs_mean": 0.24674721360206603, "signal/batch_coverage_15/group_std_mean": 0.3090873181819916, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.045537931472063066, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003528485121205449, "signal/batch_coverage_20/centered_abs_mean": 0.23448554873466493, "signal/batch_coverage_20/group_std_mean": 0.29566892981529236, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04342522844672203, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003353143390268087, "signal/batch_coverage_25/centered_abs_mean": 0.22657085955142975, "signal/batch_coverage_25/group_std_mean": 0.2883758008480072, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.042099975794553754, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003239963296800852, "signal/batch_coverage_5/centered_abs_mean": 0.2547337025403976, "signal/batch_coverage_5/group_std_mean": 0.3161489307880402, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04701984152197838, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0036426919978111982, "signal/brier_reward/centered_abs_mean": 0.1984732985496521, "signal/brier_reward/group_std_mean": 0.25304334461688993, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.25615803599357606, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019847330078482626, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04229490533471107, "signal/confidence_uniqueness_reward/group_std_mean": 0.07432217746973038, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05456542745232582, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004229490412399173, "signal/format_reward/centered_abs_mean": 0.01829833984375, "signal/format_reward/group_std_mean": 0.048546963930130006, "signal/format_reward/group_zero_std_frac": 0.74375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11600722074508667, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009149169921875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4489035427570343, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5113010764122009, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.581957995891571, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04489035606384277, "step": 30 }, { "calibration/aurc": 0.5173940626537294, "calibration/batch_distribution_entropy": 0.9432925466581696, "calibration/buffer_distribution_entropy": 0.9257200120173643, "calibration/confidence_entropy": 0.5337548494679153, "calibration/coverage@0%": 0.0023622230677838433, "calibration/coverage@1%": 0.0023622230677838433, "calibration/coverage@10%": 0.0023622230677838433, "calibration/coverage@15%": 0.0035410049931276547, "calibration/coverage@20%": 0.0035410049931276547, "calibration/coverage@25%": 0.003933932301575592, "calibration/coverage@30%": 0.003933932301575592, "calibration/coverage@5%": 0.0023622230677838433, "calibration/ece": 0.19002162983403126, "calibration/mean_confidence": 0.4036477289378933, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0080078125, "completions/max_length": 1129.8, "completions/max_terminated_length": 1129.8, "completions/mean_length": 126.34736328125, "completions/mean_terminated_length": 127.37257385253906, "completions/min_length": 0.0, "completions/min_terminated_length": 50.6, "epoch": 0.112, "grad_norm": 0.011110126040875912, "learning_rate": 1e-06, "loss": -0.0313, "num_tokens": 115811005.0, "reward": 0.8575616359710694, "reward_std": 0.1483265370130539, "rewards/accuracy_reward": 0.412109375, "rewards/batch_coverage_0": 0.18250919580459596, "rewards/batch_coverage_1": 0.18250919580459596, "rewards/batch_coverage_10": 0.219953316450119, "rewards/batch_coverage_15": 0.23371829092502594, "rewards/batch_coverage_20": 0.2468995362520218, "rewards/batch_coverage_25": 0.2535784751176834, "rewards/batch_coverage_5": 0.20052466094493865, "rewards/brier_reward": 0.7087387323379517, "rewards/confidence_uniqueness_reward": 0.9385404348373413, "rewards/format_reward": 0.99140625, "rewards/frontier_entropy_batch_reward": -0.30655706524848936, "signal/accuracy_reward/centered_abs_mean": 0.18665771484375, "signal/accuracy_reward/group_std_mean": 0.2335004061460495, "signal/accuracy_reward/group_zero_std_frac": 0.384375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8936283946037292, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.093328857421875, "signal/advantage_abs_mean": 0.7414809942245484, "signal/advantage_pre_scale_abs_mean": 0.11034760773181915, "signal/advantage_pre_scale_std": 0.16365787088871003, "signal/advantage_std": 0.9835437655448913, "signal/batch_coverage_0/centered_abs_mean": 0.2369515985250473, "signal/batch_coverage_0/group_std_mean": 0.2966028988361359, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.032617274671792984, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0033884078729897738, "signal/batch_coverage_1/centered_abs_mean": 0.2369515985250473, "signal/batch_coverage_1/group_std_mean": 0.2966028988361359, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.032617274671792984, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0033884078729897738, "signal/batch_coverage_10/centered_abs_mean": 0.2426645427942276, "signal/batch_coverage_10/group_std_mean": 0.3042888045310974, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03337412625551224, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00347010288387537, "signal/batch_coverage_15/centered_abs_mean": 0.23903344869613646, "signal/batch_coverage_15/group_std_mean": 0.3005025327205658, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.032858715206384656, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003418178344145417, "signal/batch_coverage_20/centered_abs_mean": 0.2379598081111908, "signal/batch_coverage_20/group_std_mean": 0.30019118785858157, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0327223714441061, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003402825305238366, "signal/batch_coverage_25/centered_abs_mean": 0.23780983984470366, "signal/batch_coverage_25/group_std_mean": 0.30095354914665223, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03264134675264359, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0034006804693490268, "signal/batch_coverage_5/centered_abs_mean": 0.2414990097284317, "signal/batch_coverage_5/group_std_mean": 0.3026431679725647, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03324813023209572, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0034534358885139225, "signal/brier_reward/centered_abs_mean": 0.20246321558952332, "signal/brier_reward/group_std_mean": 0.25215981602668763, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1943250447511673, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020246322453022002, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02902383990585804, "signal/confidence_uniqueness_reward/group_std_mean": 0.058507513254880905, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027896419540047644, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029023839626461266, "signal/format_reward/centered_abs_mean": 0.01649169921875, "signal/format_reward/group_std_mean": 0.044241581857204434, "signal/format_reward/group_zero_std_frac": 0.765625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07819900512695313, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008245849609375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36329306960105895, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4340018093585968, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.352882045507431, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03632930666208267, "step": 35 }, { "calibration/aurc": 0.5268840798603046, "calibration/batch_distribution_entropy": 0.9881459018375685, "calibration/buffer_distribution_entropy": 0.9507221731850362, "calibration/confidence_entropy": 0.517089658035264, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.00275049115913556, "calibration/coverage@5%": 0.0, "calibration/ece": 0.22157705645715264, "calibration/mean_confidence": 0.45853932202215536, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01123046875, "completions/max_length": 1148.6, "completions/max_terminated_length": 1148.6, "completions/mean_length": 139.51337890625, "completions/mean_terminated_length": 141.1068328857422, "completions/min_length": 0.0, "completions/min_terminated_length": 55.2, "epoch": 0.128, "grad_norm": 0.007969611324369907, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 132156294.0, "reward": 0.8748344421386719, "reward_std": 0.147980397939682, "rewards/accuracy_reward": 0.4267578125, "rewards/batch_coverage_0": 0.20069107115268708, "rewards/batch_coverage_1": 0.20069107115268708, "rewards/batch_coverage_10": 0.2380422294139862, "rewards/batch_coverage_15": 0.24701308310031891, "rewards/batch_coverage_20": 0.25846782326698303, "rewards/batch_coverage_25": 0.2643324613571167, "rewards/batch_coverage_5": 0.2187619239091873, "rewards/brier_reward": 0.6851011157035828, "rewards/confidence_uniqueness_reward": 0.9430533051490784, "rewards/format_reward": 0.98837890625, "rewards/frontier_entropy_batch_reward": -0.18829761445522308, "signal/accuracy_reward/centered_abs_mean": 0.14984130859375, "signal/accuracy_reward/group_std_mean": 0.19972037076950072, "signal/accuracy_reward/group_zero_std_frac": 0.41875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6852742671966553, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.074920654296875, "signal/advantage_abs_mean": 0.7205666780471802, "signal/advantage_pre_scale_abs_mean": 0.10542576014995575, "signal/advantage_pre_scale_std": 0.1632304906845093, "signal/advantage_std": 0.9835788607597351, "signal/batch_coverage_0/centered_abs_mean": 0.24876993000507355, "signal/batch_coverage_0/group_std_mean": 0.3085006058216095, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03299525789916515, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0035574099980294704, "signal/batch_coverage_1/centered_abs_mean": 0.24876993000507355, "signal/batch_coverage_1/group_std_mean": 0.3085006058216095, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03299525789916515, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0035574099980294704, "signal/batch_coverage_10/centered_abs_mean": 0.2526919633150101, "signal/batch_coverage_10/group_std_mean": 0.31363179683685305, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.033503925427794456, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0036134951282292605, "signal/batch_coverage_15/centered_abs_mean": 0.2509923607110977, "signal/batch_coverage_15/group_std_mean": 0.311715692281723, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.033296512067317964, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0035891907289624216, "signal/batch_coverage_20/centered_abs_mean": 0.2525142878293991, "signal/batch_coverage_20/group_std_mean": 0.31447394490242003, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03351157084107399, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003610954247415066, "signal/batch_coverage_25/centered_abs_mean": 0.25000489354133604, "signal/batch_coverage_25/group_std_mean": 0.3117690205574036, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03318729922175408, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003575070109218359, "signal/batch_coverage_5/centered_abs_mean": 0.2549441397190094, "signal/batch_coverage_5/group_std_mean": 0.31646093130111697, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03380131050944328, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.003645701287314296, "signal/brier_reward/centered_abs_mean": 0.23300331234931945, "signal/brier_reward/group_std_mean": 0.2820957779884338, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21624882519245148, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023300331830978394, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030025603249669075, "signal/confidence_uniqueness_reward/group_std_mean": 0.06504980400204659, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028021814301609993, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003002560418099165, "signal/format_reward/centered_abs_mean": 0.022149658203125, "signal/format_reward/group_std_mean": 0.056480865180492404, "signal/format_reward/group_zero_std_frac": 0.7125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10390360951423645, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0110748291015625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2851736843585968, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36849350333213804, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.2644934684038162, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02851736731827259, "step": 40 }, { "calibration/aurc": 0.3908897594631147, "calibration/batch_distribution_entropy": 0.9793994890043638, "calibration/buffer_distribution_entropy": 0.9639280501865457, "calibration/confidence_entropy": 0.4823526568785848, "calibration/coverage@0%": 0.0011881562319148685, "calibration/coverage@1%": 0.0011881562319148685, "calibration/coverage@10%": 0.0011881562319148685, "calibration/coverage@15%": 0.0039604334596376415, "calibration/coverage@20%": 0.10656705453700951, "calibration/coverage@25%": 0.21268301524550642, "calibration/coverage@30%": 0.2288754823822389, "calibration/coverage@5%": 0.0011881562319148685, "calibration/ece": 0.26584464815654074, "calibration/mean_confidence": 0.4273948038821092, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0115234375, "completions/max_length": 1287.2, "completions/max_terminated_length": 1287.2, "completions/mean_length": 156.25498046875, "completions/mean_terminated_length": 158.07511291503906, "completions/min_length": 0.0, "completions/min_terminated_length": 58.6, "epoch": 0.144, "grad_norm": 0.008597729727625847, "learning_rate": 1e-06, "loss": -0.0436, "num_tokens": 148706777.0, "reward": 0.9181379199028015, "reward_std": 0.1555813044309616, "rewards/accuracy_reward": 0.52119140625, "rewards/batch_coverage_0": 0.19273079335689544, "rewards/batch_coverage_1": 0.19273079335689544, "rewards/batch_coverage_10": 0.2425982028245926, "rewards/batch_coverage_15": 0.2533731073141098, "rewards/batch_coverage_20": 0.26104374825954435, "rewards/batch_coverage_25": 0.2662007987499237, "rewards/batch_coverage_5": 0.21613748073577882, "rewards/brier_reward": 0.6717984676361084, "rewards/confidence_uniqueness_reward": 0.9413758754730225, "rewards/format_reward": 0.98818359375, "rewards/frontier_entropy_batch_reward": -0.2110186845064163, "signal/accuracy_reward/centered_abs_mean": 0.163812255859375, "signal/accuracy_reward/group_std_mean": 0.21589326560497285, "signal/accuracy_reward/group_zero_std_frac": 0.38125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7454147100448608, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0819061279296875, "signal/advantage_abs_mean": 0.7236922383308411, "signal/advantage_pre_scale_abs_mean": 0.11080611050128937, "signal/advantage_pre_scale_std": 0.17058975994586945, "signal/advantage_std": 0.9835979700088501, "signal/batch_coverage_0/centered_abs_mean": 0.266434383392334, "signal/batch_coverage_0/group_std_mean": 0.32679077982902527, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03458985388278961, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.003810011688619852, "signal/batch_coverage_1/centered_abs_mean": 0.266434383392334, "signal/batch_coverage_1/group_std_mean": 0.32679077982902527, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03458985388278961, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.003810011688619852, "signal/batch_coverage_10/centered_abs_mean": 0.2715885639190674, "signal/batch_coverage_10/group_std_mean": 0.33486836552619936, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03524618148803711, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00388371660374105, "signal/batch_coverage_15/centered_abs_mean": 0.26918852925300596, "signal/batch_coverage_15/group_std_mean": 0.33258755803108214, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.034940270334482194, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0038493959233164786, "signal/batch_coverage_20/centered_abs_mean": 0.26530343294143677, "signal/batch_coverage_20/group_std_mean": 0.3288030087947845, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.034423737227916716, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0037938390392810105, "signal/batch_coverage_25/centered_abs_mean": 0.2609012186527252, "signal/batch_coverage_25/group_std_mean": 0.3240866124629974, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03384031280875206, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003730887267738581, "signal/batch_coverage_5/centered_abs_mean": 0.2733203172683716, "signal/batch_coverage_5/group_std_mean": 0.3354912757873535, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03547571823000908, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.003908480517566204, "signal/brier_reward/centered_abs_mean": 0.24704381227493286, "signal/brier_reward/group_std_mean": 0.29715535044670105, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22423461079597473, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.024704382568597794, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03163357488811016, "signal/confidence_uniqueness_reward/group_std_mean": 0.06734048649668693, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02875906378030777, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031633577309548855, "signal/format_reward/centered_abs_mean": 0.022491455078125, "signal/format_reward/group_std_mean": 0.057434971630573275, "signal/format_reward/group_zero_std_frac": 0.70625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10224665850400924, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0112457275390625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3063051402568817, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38502358794212344, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.2783151209354401, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0306305143982172, "step": 45 }, { "calibration/aurc": 0.46575769813070034, "calibration/batch_distribution_entropy": 0.9935435442933741, "calibration/buffer_distribution_entropy": 0.9718765395402269, "calibration/confidence_entropy": 0.49034085912160263, "calibration/coverage@0%": 0.0015873701060211075, "calibration/coverage@1%": 0.0015873701060211075, "calibration/coverage@10%": 0.0015873701060211075, "calibration/coverage@15%": 0.0015873701060211075, "calibration/coverage@20%": 0.003944933956708731, "calibration/coverage@25%": 0.004730788573604605, "calibration/coverage@30%": 0.014161043976355096, "calibration/coverage@5%": 0.0015873701060211075, "calibration/ece": 0.18153789884384935, "calibration/mean_confidence": 0.4796684327608073, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01484375, "completions/max_length": 1009.0, "completions/max_terminated_length": 1009.0, "completions/mean_length": 163.23037109375, "completions/mean_terminated_length": 165.69429931640624, "completions/min_length": 0.0, "completions/min_terminated_length": 61.8, "epoch": 0.16, "grad_norm": 0.007046389393508434, "learning_rate": 1e-06, "loss": -0.0617, "num_tokens": 165399184.0, "reward": 0.8970174312591552, "reward_std": 0.16062215864658355, "rewards/accuracy_reward": 0.45966796875, "rewards/batch_coverage_0": 0.224856960773468, "rewards/batch_coverage_1": 0.224856960773468, "rewards/batch_coverage_10": 0.26703203916549684, "rewards/batch_coverage_15": 0.2741366446018219, "rewards/batch_coverage_20": 0.2853119790554047, "rewards/batch_coverage_25": 0.2901387333869934, "rewards/batch_coverage_5": 0.24517568647861482, "rewards/brier_reward": 0.6930433869361877, "rewards/confidence_uniqueness_reward": 0.9418343544006348, "rewards/format_reward": 0.9849609375, "rewards/frontier_entropy_batch_reward": -0.14689382612705232, "signal/accuracy_reward/centered_abs_mean": 0.155767822265625, "signal/accuracy_reward/group_std_mean": 0.20625517666339874, "signal/accuracy_reward/group_zero_std_frac": 0.403125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6726519107818604, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0778839111328125, "signal/advantage_abs_mean": 0.7129915833473206, "signal/advantage_pre_scale_abs_mean": 0.11246829628944396, "signal/advantage_pre_scale_std": 0.1778249204158783, "signal/advantage_std": 0.9836406350135803, "signal/batch_coverage_0/centered_abs_mean": 0.261820513010025, "signal/batch_coverage_0/group_std_mean": 0.325615668296814, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.032354673743247984, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.003744033258408308, "signal/batch_coverage_1/centered_abs_mean": 0.261820513010025, "signal/batch_coverage_1/group_std_mean": 0.325615668296814, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.032354673743247984, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.003744033258408308, "signal/batch_coverage_10/centered_abs_mean": 0.2718116581439972, "signal/batch_coverage_10/group_std_mean": 0.3369123458862305, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03358059972524643, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0038869067560881377, "signal/batch_coverage_15/centered_abs_mean": 0.2694369077682495, "signal/batch_coverage_15/group_std_mean": 0.33361018300056455, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03328223079442978, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003852947847917676, "signal/batch_coverage_20/centered_abs_mean": 0.2705358564853668, "signal/batch_coverage_20/group_std_mean": 0.33500627279281614, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03342964798212052, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0038686628453433515, "signal/batch_coverage_25/centered_abs_mean": 0.27277548909187316, "signal/batch_coverage_25/group_std_mean": 0.33765124082565307, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03371136784553528, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003900689631700516, "signal/batch_coverage_5/centered_abs_mean": 0.26610376834869387, "signal/batch_coverage_5/group_std_mean": 0.3305723547935486, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03288530968129635, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0038052838295698165, "signal/brier_reward/centered_abs_mean": 0.24320079386234283, "signal/brier_reward/group_std_mean": 0.29356223344802856, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21018587350845336, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.024320079386234282, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033786237612366675, "signal/confidence_uniqueness_reward/group_std_mean": 0.07571721524000168, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02918182797729969, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033786237705498933, "signal/format_reward/centered_abs_mean": 0.02840576171875, "signal/format_reward/group_std_mean": 0.07038533240556717, "signal/format_reward/group_zero_std_frac": 0.646875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12264087647199631, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014202880859375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23739419281482696, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3186127722263336, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.20512811839580536, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023739420250058173, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.562246010539858, "eval_calibration/batch_distribution_entropy": 0.9193620611440112, "eval_calibration/buffer_distribution_entropy": 0.9754673264817139, "eval_calibration/confidence_entropy": 0.48591568590613476, "eval_calibration/coverage@0%": 0.023941532258064516, "eval_calibration/coverage@1%": 0.023941532258064516, "eval_calibration/coverage@10%": 0.023941532258064516, "eval_calibration/coverage@15%": 0.023941532258064516, "eval_calibration/coverage@20%": 0.04813508064516129, "eval_calibration/coverage@25%": 0.10282258064516128, "eval_calibration/coverage@30%": 0.11844758064516128, "eval_calibration/coverage@5%": 0.023941532258064516, "eval_calibration/ece": 0.28053149868195854, "eval_calibration/mean_confidence": 0.4684673564087854, "eval_completions/clipped_ratio": 0.01037176724137931, "eval_completions/max_length": 618.0, "eval_completions/max_terminated_length": 618.0, "eval_completions/mean_length": 173.83795928955078, "eval_completions/mean_terminated_length": 175.71324920654297, "eval_completions/min_length": 17.75, "eval_completions/min_terminated_length": 74.5, "eval_loss": 0.0, "eval_num_tokens": 165399184.0, "eval_reward": 0.7727401107549667, "eval_reward_std": 0.23869208991527557, "eval_rewards/accuracy_reward": 0.404296875, "eval_rewards/batch_coverage_0": 0.12788349762558937, "eval_rewards/batch_coverage_1": 0.12788349762558937, "eval_rewards/batch_coverage_10": 0.1259235292673111, "eval_rewards/batch_coverage_15": 0.1259235292673111, "eval_rewards/batch_coverage_20": 0.11765317618846893, "eval_rewards/batch_coverage_25": 0.10355318710207939, "eval_rewards/batch_coverage_5": 0.12788349762558937, "eval_rewards/brier_reward": 0.7332238703966141, "eval_rewards/confidence_uniqueness_reward": 0.8892467767000198, "eval_rewards/format_reward": 0.990234375, "eval_rewards/frontier_entropy_batch_reward": -0.990234375, "eval_runtime": 46.2682, "eval_samples_per_second": 10.807, "eval_signal/accuracy_reward/centered_abs_mean": 0.4644775390625, "eval_signal/accuracy_reward/group_std_mean": 0.48902085423469543, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9803617894649506, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23223876953125, "eval_signal/advantage_abs_mean": 0.8980159163475037, "eval_signal/advantage_pre_scale_abs_mean": 0.2143118791282177, "eval_signal/advantage_pre_scale_std": 0.23657548055052757, "eval_signal/advantage_std": 0.9877015054225922, "eval_signal/batch_coverage_0/centered_abs_mean": 0.2970190942287445, "eval_signal/batch_coverage_0/group_std_mean": 0.38584908097982407, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017935237381607294, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004247372969985008, "eval_signal/batch_coverage_1/centered_abs_mean": 0.2970190942287445, "eval_signal/batch_coverage_1/group_std_mean": 0.38584908097982407, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017935237381607294, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004247372969985008, "eval_signal/batch_coverage_10/centered_abs_mean": 0.2857876867055893, "eval_signal/batch_coverage_10/group_std_mean": 0.3715630993247032, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01728172041475773, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004086763830855489, "eval_signal/batch_coverage_15/centered_abs_mean": 0.2857876867055893, "eval_signal/batch_coverage_15/group_std_mean": 0.3715630993247032, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01728172041475773, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.004086763830855489, "eval_signal/batch_coverage_20/centered_abs_mean": 0.2643692269921303, "eval_signal/batch_coverage_20/group_std_mean": 0.3458894342184067, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016003886004909873, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0037804798339493573, "eval_signal/batch_coverage_25/centered_abs_mean": 0.22446337342262268, "eval_signal/batch_coverage_25/group_std_mean": 0.29489558935165405, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013599664904177189, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0032098261872306466, "eval_signal/batch_coverage_5/centered_abs_mean": 0.2970190942287445, "eval_signal/batch_coverage_5/group_std_mean": 0.38584908097982407, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017935237381607294, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004247372969985008, "eval_signal/brier_reward/centered_abs_mean": 0.23577644675970078, "eval_signal/brier_reward/group_std_mean": 0.28787870705127716, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09959037974476814, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02357764495536685, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05138667766004801, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08252080902457237, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021524199284613132, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005138667649589479, "eval_signal/format_reward/centered_abs_mean": 0.0189208984375, "eval_signal/format_reward/group_std_mean": 0.055242715403437614, "eval_signal/format_reward/group_zero_std_frac": 0.6875, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.038965243846178055, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00946044921875, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0189208984375, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.055242715403437614, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6875, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.007793049095198512, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.001892089785542339, "eval_steps_per_second": 0.086, "step": 50 }, { "calibration/aurc": 0.4058639576024353, "calibration/batch_distribution_entropy": 0.9914696006819504, "calibration/buffer_distribution_entropy": 0.9771811661947689, "calibration/confidence_entropy": 0.4794375728490169, "calibration/coverage@0%": 0.000392156862745098, "calibration/coverage@1%": 0.000392156862745098, "calibration/coverage@10%": 0.000392156862745098, "calibration/coverage@15%": 0.000392156862745098, "calibration/coverage@20%": 0.004321429947224469, "calibration/coverage@25%": 0.032219268847028006, "calibration/coverage@30%": 0.17706864497969316, "calibration/coverage@5%": 0.000392156862745098, "calibration/ece": 0.2033278974465818, "calibration/mean_confidence": 0.5026187365209009, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00908203125, "completions/max_length": 1191.4, "completions/max_terminated_length": 1191.4, "completions/mean_length": 178.840625, "completions/mean_terminated_length": 180.475341796875, "completions/min_length": 0.0, "completions/min_terminated_length": 65.2, "epoch": 0.176, "grad_norm": 0.009468083269894123, "learning_rate": 1e-06, "loss": -0.0364, "num_tokens": 182467632.0, "reward": 0.9074610710144043, "reward_std": 0.14233968853950502, "rewards/accuracy_reward": 0.4705078125, "rewards/batch_coverage_0": 0.2374819278717041, "rewards/batch_coverage_1": 0.2374819278717041, "rewards/batch_coverage_10": 0.28763837218284605, "rewards/batch_coverage_15": 0.30320699214935304, "rewards/batch_coverage_20": 0.3154535412788391, "rewards/batch_coverage_25": 0.3183199048042297, "rewards/batch_coverage_5": 0.2596494257450104, "rewards/brier_reward": 0.7119304656982421, "rewards/confidence_uniqueness_reward": 0.9463279843330383, "rewards/format_reward": 0.99091796875, "rewards/frontier_entropy_batch_reward": -0.17094670236110687, "signal/accuracy_reward/centered_abs_mean": 0.14677734375, "signal/accuracy_reward/group_std_mean": 0.1891311824321747, "signal/accuracy_reward/group_zero_std_frac": 0.48125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7422645807266235, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.073388671875, "signal/advantage_abs_mean": 0.7341462850570679, "signal/advantage_pre_scale_abs_mean": 0.10340920239686965, "signal/advantage_pre_scale_std": 0.15987979173660277, "signal/advantage_std": 0.9834848642349243, "signal/batch_coverage_0/centered_abs_mean": 0.2536461532115936, "signal/batch_coverage_0/group_std_mean": 0.3149066686630249, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0370890274643898, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0036271399818360805, "signal/batch_coverage_1/centered_abs_mean": 0.2536461532115936, "signal/batch_coverage_1/group_std_mean": 0.3149066686630249, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0370890274643898, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0036271399818360805, "signal/batch_coverage_10/centered_abs_mean": 0.2639505207538605, "signal/batch_coverage_10/group_std_mean": 0.3261951506137848, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.038583753257989885, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0037744925823062657, "signal/batch_coverage_15/centered_abs_mean": 0.26278347074985503, "signal/batch_coverage_15/group_std_mean": 0.32544536590576173, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.038414908945560454, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0037578035145998, "signal/batch_coverage_20/centered_abs_mean": 0.26741994023323057, "signal/batch_coverage_20/group_std_mean": 0.3316928446292877, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03910925537347794, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003824105253443122, "signal/batch_coverage_25/centered_abs_mean": 0.2639124572277069, "signal/batch_coverage_25/group_std_mean": 0.3276098668575287, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03858695030212402, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003773948224261403, "signal/batch_coverage_5/centered_abs_mean": 0.25576930344104765, "signal/batch_coverage_5/group_std_mean": 0.31654787063598633, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03741679862141609, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0036575009115040304, "signal/brier_reward/centered_abs_mean": 0.22843996584415435, "signal/brier_reward/group_std_mean": 0.2793159544467926, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23320569694042206, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022843996807932854, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025819224119186402, "signal/confidence_uniqueness_reward/group_std_mean": 0.053821495920419696, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026197914406657218, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025819224305450917, "signal/format_reward/centered_abs_mean": 0.017303466796875, "signal/format_reward/group_std_mean": 0.04413560926914215, "signal/format_reward/group_zero_std_frac": 0.775, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08711997866630554, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0086517333984375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26356192827224734, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3436621904373169, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.2690876364707947, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02635619342327118, "step": 55 }, { "calibration/aurc": 0.3629724345315162, "calibration/batch_distribution_entropy": 0.9816236022150591, "calibration/buffer_distribution_entropy": 0.9804879354242804, "calibration/confidence_entropy": 0.4597148913096222, "calibration/coverage@0%": 0.0015670956059039057, "calibration/coverage@1%": 0.0015670956059039057, "calibration/coverage@10%": 0.004306821633301166, "calibration/coverage@15%": 0.009394884255610362, "calibration/coverage@20%": 0.02705354499632832, "calibration/coverage@25%": 0.07451777976847278, "calibration/coverage@30%": 0.2995797753932156, "calibration/coverage@5%": 0.0015670956059039057, "calibration/ece": 0.15940265474416004, "calibration/mean_confidence": 0.47236021548759366, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005078125, "completions/max_length": 1090.8, "completions/max_terminated_length": 1090.8, "completions/mean_length": 192.31787109375, "completions/mean_terminated_length": 193.29725036621093, "completions/min_length": 0.0, "completions/min_terminated_length": 63.2, "epoch": 0.192, "grad_norm": 0.006933207623660564, "learning_rate": 1e-06, "loss": -0.0216, "num_tokens": 199251783.0, "reward": 0.9257945537567138, "reward_std": 0.13200957477092742, "rewards/accuracy_reward": 0.496875, "rewards/batch_coverage_0": 0.27818471789360044, "rewards/batch_coverage_1": 0.27818471789360044, "rewards/batch_coverage_10": 0.3267790138721466, "rewards/batch_coverage_15": 0.3349075675010681, "rewards/batch_coverage_20": 0.3430617153644562, "rewards/batch_coverage_25": 0.34973667860031127, "rewards/batch_coverage_5": 0.3075689971446991, "rewards/brier_reward": 0.734505581855774, "rewards/confidence_uniqueness_reward": 0.9484726190567017, "rewards/format_reward": 0.99482421875, "rewards/frontier_entropy_batch_reward": -0.20076338946819305, "signal/accuracy_reward/centered_abs_mean": 0.14207763671875, "signal/accuracy_reward/group_std_mean": 0.18903130292892456, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7877019166946411, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.071038818359375, "signal/advantage_abs_mean": 0.7333375334739685, "signal/advantage_pre_scale_abs_mean": 0.09644296765327454, "signal/advantage_pre_scale_std": 0.14937117993831633, "signal/advantage_std": 0.9833891034126282, "signal/batch_coverage_0/centered_abs_mean": 0.2371144860982895, "signal/batch_coverage_0/group_std_mean": 0.29555144906044006, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03813448995351791, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.003390737110748887, "signal/batch_coverage_1/centered_abs_mean": 0.2371144860982895, "signal/batch_coverage_1/group_std_mean": 0.29555144906044006, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03813448995351791, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.003390737110748887, "signal/batch_coverage_10/centered_abs_mean": 0.2512255012989044, "signal/batch_coverage_10/group_std_mean": 0.3117607593536377, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040200534462928775, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0035925245843827724, "signal/batch_coverage_15/centered_abs_mean": 0.25101412534713746, "signal/batch_coverage_15/group_std_mean": 0.31150234341621397, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04012797474861145, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003589502023532987, "signal/batch_coverage_20/centered_abs_mean": 0.25290718078613283, "signal/batch_coverage_20/group_std_mean": 0.31392589807510374, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.040450763702392575, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0036165726836770774, "signal/batch_coverage_25/centered_abs_mean": 0.2513018786907196, "signal/batch_coverage_25/group_std_mean": 0.3126278817653656, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04021292626857757, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003593616746366024, "signal/batch_coverage_5/centered_abs_mean": 0.2468347042798996, "signal/batch_coverage_5/group_std_mean": 0.30652726292610166, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03960250541567802, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00352973616681993, "signal/brier_reward/centered_abs_mean": 0.21205961108207702, "signal/brier_reward/group_std_mean": 0.26358293294906615, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.237126162648201, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021205961331725122, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021224848553538324, "signal/confidence_uniqueness_reward/group_std_mean": 0.041045262664556506, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0238847978413105, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002122484892606735, "signal/format_reward/centered_abs_mean": 0.009942626953125, "signal/format_reward/group_std_mean": 0.027782656624913214, "signal/format_reward/group_zero_std_frac": 0.846875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05646195188164711, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0049713134765625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2922514736652374, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3685956597328186, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.32798022627830503, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029225147888064386, "step": 60 }, { "calibration/aurc": 0.2955570498534926, "calibration/batch_distribution_entropy": 0.9753288303488776, "calibration/buffer_distribution_entropy": 0.9818330363107535, "calibration/confidence_entropy": 0.4696564086727891, "calibration/coverage@0%": 0.004703580062161851, "calibration/coverage@1%": 0.004703580062161851, "calibration/coverage@10%": 0.10038985457196577, "calibration/coverage@15%": 0.22431142319941683, "calibration/coverage@20%": 0.38893749280534134, "calibration/coverage@25%": 0.5214635015671635, "calibration/coverage@30%": 0.5915759301002205, "calibration/coverage@5%": 0.004703580062161851, "calibration/ece": 0.15610340603473422, "calibration/mean_confidence": 0.5436399052179712, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00458984375, "completions/max_length": 885.2, "completions/max_terminated_length": 885.2, "completions/mean_length": 210.11533203125, "completions/mean_terminated_length": 211.0801208496094, "completions/min_length": 0.0, "completions/min_terminated_length": 77.4, "epoch": 0.208, "grad_norm": 0.007579752244055271, "learning_rate": 1e-06, "loss": -0.0188, "num_tokens": 216435588.0, "reward": 0.9540463805198669, "reward_std": 0.1229624554514885, "rewards/accuracy_reward": 0.54208984375, "rewards/batch_coverage_0": 0.2904252469539642, "rewards/batch_coverage_1": 0.2904252469539642, "rewards/batch_coverage_10": 0.3460793435573578, "rewards/batch_coverage_15": 0.35459370613098146, "rewards/batch_coverage_20": 0.369259774684906, "rewards/batch_coverage_25": 0.3725292026996613, "rewards/batch_coverage_5": 0.3211548626422882, "rewards/brier_reward": 0.7557557940483093, "rewards/confidence_uniqueness_reward": 0.9494149565696717, "rewards/format_reward": 0.99521484375, "rewards/frontier_entropy_batch_reward": -0.18648923933506012, "signal/accuracy_reward/centered_abs_mean": 0.127740478515625, "signal/accuracy_reward/group_std_mean": 0.1696159452199936, "signal/accuracy_reward/group_zero_std_frac": 0.515625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8017047643661499, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0638702392578125, "signal/advantage_abs_mean": 0.7370152473449707, "signal/advantage_pre_scale_abs_mean": 0.08942593038082122, "signal/advantage_pre_scale_std": 0.1426718145608902, "signal/advantage_std": 0.9832539916038513, "signal/batch_coverage_0/centered_abs_mean": 0.21396125555038453, "signal/batch_coverage_0/group_std_mean": 0.2671938180923462, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03842084556818008, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.003059645928442478, "signal/batch_coverage_1/centered_abs_mean": 0.21396125555038453, "signal/batch_coverage_1/group_std_mean": 0.2671938180923462, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03842084556818008, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.003059645928442478, "signal/batch_coverage_10/centered_abs_mean": 0.22922752797603607, "signal/batch_coverage_10/group_std_mean": 0.28651703596115113, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04119753390550614, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.003277953574433923, "signal/batch_coverage_15/centered_abs_mean": 0.23158512711524964, "signal/batch_coverage_15/group_std_mean": 0.28928992748260496, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04162941426038742, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0033116671722382305, "signal/batch_coverage_20/centered_abs_mean": 0.23296157717704774, "signal/batch_coverage_20/group_std_mean": 0.29139900803565977, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04184585437178612, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0033313506282866, "signal/batch_coverage_25/centered_abs_mean": 0.23438824713230133, "signal/batch_coverage_25/group_std_mean": 0.2934773325920105, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04214929640293121, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0033517518546432256, "signal/batch_coverage_5/centered_abs_mean": 0.2227215588092804, "signal/batch_coverage_5/group_std_mean": 0.27792264223098756, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04000507667660713, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0031849182676523926, "signal/brier_reward/centered_abs_mean": 0.18360722959041595, "signal/brier_reward/group_std_mean": 0.23183298110961914, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2296048790216446, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018360722810029984, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019582030177116395, "signal/confidence_uniqueness_reward/group_std_mean": 0.03763454332947731, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02437414266169071, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001958203059621155, "signal/format_reward/centered_abs_mean": 0.009185791015625, "signal/format_reward/group_std_mean": 0.025268962234258653, "signal/format_reward/group_zero_std_frac": 0.8625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05644859969615936, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0045928955078125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27716624140739443, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35632293224334716, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34786927700042725, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027716624736785888, "step": 65 }, { "calibration/aurc": 0.2965983333644823, "calibration/batch_distribution_entropy": 0.9823246202723134, "calibration/buffer_distribution_entropy": 0.9834847325184812, "calibration/confidence_entropy": 0.4621176100001009, "calibration/coverage@0%": 0.008615939526495529, "calibration/coverage@1%": 0.008615939526495529, "calibration/coverage@10%": 0.1707962088945167, "calibration/coverage@15%": 0.2769310463911592, "calibration/coverage@20%": 0.37681669928245265, "calibration/coverage@25%": 0.48340585549288206, "calibration/coverage@30%": 0.601358351559802, "calibration/coverage@5%": 0.05362572426230765, "calibration/ece": 0.15821446691039592, "calibration/mean_confidence": 0.5084448069151312, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0029296875, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 219.26708984375, "completions/mean_terminated_length": 219.91590270996093, "completions/min_length": 0.0, "completions/min_terminated_length": 85.6, "epoch": 0.224, "grad_norm": 0.006702653598040342, "learning_rate": 1e-06, "loss": -0.0129, "num_tokens": 233834067.0, "reward": 0.9355417966842652, "reward_std": 0.11460374146699906, "rewards/accuracy_reward": 0.49541015625, "rewards/batch_coverage_0": 0.3303173840045929, "rewards/batch_coverage_1": 0.3303173840045929, "rewards/batch_coverage_10": 0.38841472268104554, "rewards/batch_coverage_15": 0.39242235422134397, "rewards/batch_coverage_20": 0.3987464547157288, "rewards/batch_coverage_25": 0.4024514138698578, "rewards/batch_coverage_5": 0.35994952321052553, "rewards/brier_reward": 0.7808137893676758, "rewards/confidence_uniqueness_reward": 0.9499208688735962, "rewards/format_reward": 0.9970703125, "rewards/frontier_entropy_batch_reward": -0.20989351868629455, "signal/accuracy_reward/centered_abs_mean": 0.116143798828125, "signal/accuracy_reward/group_std_mean": 0.15492962598800658, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7869158387184143, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0580718994140625, "signal/advantage_abs_mean": 0.7476185441017151, "signal/advantage_pre_scale_abs_mean": 0.08492442816495896, "signal/advantage_pre_scale_std": 0.13532426059246064, "signal/advantage_std": 0.9831453442573548, "signal/batch_coverage_0/centered_abs_mean": 0.18892920017242432, "signal/batch_coverage_0/group_std_mean": 0.23911657631397248, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0369180828332901, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002701687626540661, "signal/batch_coverage_1/centered_abs_mean": 0.18892920017242432, "signal/batch_coverage_1/group_std_mean": 0.23911657631397248, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0369180828332901, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002701687626540661, "signal/batch_coverage_10/centered_abs_mean": 0.2077297806739807, "signal/batch_coverage_10/group_std_mean": 0.262596932053566, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04057376310229301, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0029705358669161797, "signal/batch_coverage_15/centered_abs_mean": 0.20889460444450378, "signal/batch_coverage_15/group_std_mean": 0.2641852557659149, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040815822780132294, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0029871928971260788, "signal/batch_coverage_20/centered_abs_mean": 0.2125068187713623, "signal/batch_coverage_20/group_std_mean": 0.269322806596756, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.041476115584373474, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0030388474464416505, "signal/batch_coverage_25/centered_abs_mean": 0.21426658630371093, "signal/batch_coverage_25/group_std_mean": 0.27135098576545713, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04181862398982048, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003064012061804533, "signal/batch_coverage_5/centered_abs_mean": 0.19771002233028412, "signal/batch_coverage_5/group_std_mean": 0.24965128302574158, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03862960487604141, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0028272532392293213, "signal/brier_reward/centered_abs_mean": 0.1645033210515976, "signal/brier_reward/group_std_mean": 0.21097516715526582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22423305213451386, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016450332850217818, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01771753653883934, "signal/confidence_uniqueness_reward/group_std_mean": 0.030995216965675355, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024135235324501993, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017717537004500628, "signal/format_reward/centered_abs_mean": 0.0056640625, "signal/format_reward/group_std_mean": 0.016236505843698977, "signal/format_reward/group_zero_std_frac": 0.909375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.038613373041152955, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00283203125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2941046953201294, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3701900064945221, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4015364408493042, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02941046915948391, "step": 70 }, { "calibration/aurc": 0.3330424907967232, "calibration/batch_distribution_entropy": 0.9678999886467642, "calibration/buffer_distribution_entropy": 0.9849417318548467, "calibration/confidence_entropy": 0.47655475637605293, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.1390625, "calibration/coverage@15%": 0.1921875, "calibration/coverage@20%": 0.2574938725490196, "calibration/coverage@25%": 0.31924479166666664, "calibration/coverage@30%": 0.4216636029411765, "calibration/coverage@5%": 0.100390625, "calibration/ece": 0.1458545029740423, "calibration/mean_confidence": 0.5140984260384581, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 785.6, "completions/max_terminated_length": 785.6, "completions/mean_length": 225.28369140625, "completions/mean_terminated_length": 225.57015075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 97.8, "epoch": 0.24, "grad_norm": 0.006990624126046896, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 251392652.0, "reward": 0.9652495741844177, "reward_std": 0.10869128853082657, "rewards/accuracy_reward": 0.5564453125, "rewards/batch_coverage_0": 0.34254276752471924, "rewards/batch_coverage_1": 0.34254276752471924, "rewards/batch_coverage_10": 0.366526848077774, "rewards/batch_coverage_15": 0.3773797333240509, "rewards/batch_coverage_20": 0.3843431532382965, "rewards/batch_coverage_25": 0.391269850730896, "rewards/batch_coverage_5": 0.3533271372318268, "rewards/brier_reward": 0.778117573261261, "rewards/confidence_uniqueness_reward": 0.9503395080566406, "rewards/format_reward": 0.9986328125, "rewards/frontier_entropy_batch_reward": -0.21713619232177733, "signal/accuracy_reward/centered_abs_mean": 0.121923828125, "signal/accuracy_reward/group_std_mean": 0.1602822333574295, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8842084884643555, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0609619140625, "signal/advantage_abs_mean": 0.7662800312042236, "signal/advantage_pre_scale_abs_mean": 0.08325667977333069, "signal/advantage_pre_scale_std": 0.13016353249549867, "signal/advantage_std": 0.9830474019050598, "signal/batch_coverage_0/centered_abs_mean": 0.18846372067928313, "signal/batch_coverage_0/group_std_mean": 0.23571240305900573, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03953521251678467, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0026950312312692404, "signal/batch_coverage_1/centered_abs_mean": 0.18846372067928313, "signal/batch_coverage_1/group_std_mean": 0.23571240305900573, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03953521251678467, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0026950312312692404, "signal/batch_coverage_10/centered_abs_mean": 0.19527166485786437, "signal/batch_coverage_10/group_std_mean": 0.24427315294742585, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040969235450029375, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0027923848014324904, "signal/batch_coverage_15/centered_abs_mean": 0.19935429692268372, "signal/batch_coverage_15/group_std_mean": 0.2496377408504486, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04183716475963593, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002850766479969025, "signal/batch_coverage_20/centered_abs_mean": 0.20147362351417542, "signal/batch_coverage_20/group_std_mean": 0.2527039110660553, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04230139851570129, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0028810727875679732, "signal/batch_coverage_25/centered_abs_mean": 0.2088294118642807, "signal/batch_coverage_25/group_std_mean": 0.26152198314666747, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04382089376449585, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002986260550096631, "signal/batch_coverage_5/centered_abs_mean": 0.19283068180084229, "signal/batch_coverage_5/group_std_mean": 0.24108233153820038, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04044611379504204, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0027574787847697733, "signal/brier_reward/centered_abs_mean": 0.15715266466140748, "signal/brier_reward/group_std_mean": 0.20129252970218658, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2302556663751602, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01571526676416397, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015452152304351329, "signal/confidence_uniqueness_reward/group_std_mean": 0.023464472219347953, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022645176202058793, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015452152118086814, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_std_mean": 0.007733980193734169, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01969538666307926, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2867373704910278, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36399008631706237, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4199453890323639, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02867373712360859, "step": 75 }, { "calibration/aurc": 0.26090237641990177, "calibration/batch_distribution_entropy": 0.9734002526952843, "calibration/buffer_distribution_entropy": 0.9856670911764873, "calibration/confidence_entropy": 0.46283773268623846, "calibration/coverage@0%": 0.0140625, "calibration/coverage@1%": 0.0140625, "calibration/coverage@10%": 0.24609375, "calibration/coverage@15%": 0.369921875, "calibration/coverage@20%": 0.440234375, "calibration/coverage@25%": 0.480078125, "calibration/coverage@30%": 0.7054106531311154, "calibration/coverage@5%": 0.090625, "calibration/ece": 0.12688025020767357, "calibration/mean_confidence": 0.5268819480757656, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 798.2, "completions/max_terminated_length": 798.2, "completions/mean_length": 221.2, "completions/mean_terminated_length": 221.41583557128905, "completions/min_length": 0.0, "completions/min_terminated_length": 97.8, "epoch": 0.256, "grad_norm": 0.007445762399584055, "learning_rate": 1e-06, "loss": -0.0042, "num_tokens": 268712556.0, "reward": 0.9550706624984742, "reward_std": 0.10047266483306885, "rewards/accuracy_reward": 0.53466796875, "rewards/batch_coverage_0": 0.35365639328956605, "rewards/batch_coverage_1": 0.35365639328956605, "rewards/batch_coverage_10": 0.398507171869278, "rewards/batch_coverage_15": 0.40682188868522645, "rewards/batch_coverage_20": 0.41643165946006777, "rewards/batch_coverage_25": 0.4207874059677124, "rewards/batch_coverage_5": 0.38032680153846743, "rewards/brier_reward": 0.7843676567077636, "rewards/confidence_uniqueness_reward": 0.9491483807563782, "rewards/format_reward": 0.9990234375, "rewards/frontier_entropy_batch_reward": -0.2416835457086563, "signal/accuracy_reward/centered_abs_mean": 0.110015869140625, "signal/accuracy_reward/group_std_mean": 0.1448248639702797, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.894112491607666, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0550079345703125, "signal/advantage_abs_mean": 0.7612926959991455, "signal/advantage_pre_scale_abs_mean": 0.0766275018453598, "signal/advantage_pre_scale_std": 0.12247075140476227, "signal/advantage_std": 0.9828827261924744, "signal/batch_coverage_0/centered_abs_mean": 0.1827174574136734, "signal/batch_coverage_0/group_std_mean": 0.2315205842256546, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04265338107943535, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002612859569489956, "signal/batch_coverage_1/centered_abs_mean": 0.1827174574136734, "signal/batch_coverage_1/group_std_mean": 0.2315205842256546, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04265338107943535, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002612859569489956, "signal/batch_coverage_10/centered_abs_mean": 0.1913502722978592, "signal/batch_coverage_10/group_std_mean": 0.24344587028026582, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04468824490904808, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002736308891326189, "signal/batch_coverage_15/centered_abs_mean": 0.19306427836418152, "signal/batch_coverage_15/group_std_mean": 0.2460806131362915, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.045067351311445236, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0027608192525804044, "signal/batch_coverage_20/centered_abs_mean": 0.19285837113857268, "signal/batch_coverage_20/group_std_mean": 0.24617846310138702, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.045022976398468015, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0027578747365623714, "signal/batch_coverage_25/centered_abs_mean": 0.19478027522563934, "signal/batch_coverage_25/group_std_mean": 0.24878009259700776, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04542726948857308, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002785358065739274, "signal/batch_coverage_5/centered_abs_mean": 0.19009583592414855, "signal/batch_coverage_5/group_std_mean": 0.24117153286933898, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.044343578070402144, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002718370407819748, "signal/brier_reward/centered_abs_mean": 0.14258549511432647, "signal/brier_reward/group_std_mean": 0.18335953056812287, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2328980028629303, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014258549734950065, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016221878677606584, "signal/confidence_uniqueness_reward/group_std_mean": 0.023178667202591895, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026576806232333182, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016221878584474325, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015373882651329041, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30239012837409973, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37879385948181155, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4947479128837585, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030239013954997063, "step": 80 }, { "calibration/aurc": 0.35560353793626626, "calibration/batch_distribution_entropy": 0.9819774641556854, "calibration/buffer_distribution_entropy": 0.986498195012946, "calibration/confidence_entropy": 0.4855185970826679, "calibration/coverage@0%": 0.015255017674878168, "calibration/coverage@1%": 0.015255017674878168, "calibration/coverage@10%": 0.14282666748206133, "calibration/coverage@15%": 0.2285940257952496, "calibration/coverage@20%": 0.2971340736637121, "calibration/coverage@25%": 0.34332502650032615, "calibration/coverage@30%": 0.3945764864164844, "calibration/coverage@5%": 0.06731295086527762, "calibration/ece": 0.131870000208108, "calibration/mean_confidence": 0.502433525846682, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 762.2, "completions/max_terminated_length": 762.2, "completions/mean_length": 230.17255859375, "completions/mean_terminated_length": 230.6666259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 103.8, "epoch": 0.272, "grad_norm": 0.006748313549906015, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 286035219.0, "reward": 0.9507703185081482, "reward_std": 0.10085932165384293, "rewards/accuracy_reward": 0.5171875, "rewards/batch_coverage_0": 0.34573878049850465, "rewards/batch_coverage_1": 0.34573878049850465, "rewards/batch_coverage_10": 0.3782036781311035, "rewards/batch_coverage_15": 0.39221201539039613, "rewards/batch_coverage_20": 0.39762226939201356, "rewards/batch_coverage_25": 0.400358122587204, "rewards/batch_coverage_5": 0.36215776205062866, "rewards/brier_reward": 0.7837765336036682, "rewards/confidence_uniqueness_reward": 0.9518365740776062, "rewards/format_reward": 0.9978515625, "rewards/frontier_entropy_batch_reward": -0.1780557692050934, "signal/accuracy_reward/centered_abs_mean": 0.10745849609375, "signal/accuracy_reward/group_std_mean": 0.1406096488237381, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8927412509918213, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.053729248046875, "signal/advantage_abs_mean": 0.757426643371582, "signal/advantage_pre_scale_abs_mean": 0.07568480670452118, "signal/advantage_pre_scale_std": 0.12425664216279983, "signal/advantage_std": 0.9828558802604676, "signal/batch_coverage_0/centered_abs_mean": 0.16644979119300843, "signal/batch_coverage_0/group_std_mean": 0.21065367460250856, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03954842537641525, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0023802319541573525, "signal/batch_coverage_1/centered_abs_mean": 0.16644979119300843, "signal/batch_coverage_1/group_std_mean": 0.21065367460250856, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03954842537641525, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0023802319541573525, "signal/batch_coverage_10/centered_abs_mean": 0.17243632078170776, "signal/batch_coverage_10/group_std_mean": 0.21887100636959075, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04097995311021805, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002465839311480522, "signal/batch_coverage_15/centered_abs_mean": 0.1725291758775711, "signal/batch_coverage_15/group_std_mean": 0.22031511068344117, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041006506979465486, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00246716714464128, "signal/batch_coverage_20/centered_abs_mean": 0.17189605236053468, "signal/batch_coverage_20/group_std_mean": 0.22016339004039764, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04085053354501724, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002458113431930542, "signal/batch_coverage_25/centered_abs_mean": 0.17436989545822143, "signal/batch_coverage_25/group_std_mean": 0.22325910925865172, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.041441477835178375, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0024934894870966675, "signal/batch_coverage_5/centered_abs_mean": 0.17018766105175018, "signal/batch_coverage_5/group_std_mean": 0.21510340869426728, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04042254015803337, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024336835369467734, "signal/brier_reward/centered_abs_mean": 0.1363950252532959, "signal/brier_reward/group_std_mean": 0.1755893498659134, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22622712850570678, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01363950278609991, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01479104645550251, "signal/confidence_uniqueness_reward/group_std_mean": 0.02456299029290676, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024606984853744508, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014791046734899283, "signal/format_reward/centered_abs_mean": 0.00413818359375, "signal/format_reward/group_std_mean": 0.011480780877172947, "signal/format_reward/group_zero_std_frac": 0.9375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03427073359489441, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002069091796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24672031104564668, "signal/frontier_entropy_batch_reward/group_std_mean": 0.31942119598388674, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41071382761001585, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02467203177511692, "step": 85 }, { "calibration/aurc": 0.292581212532501, "calibration/batch_distribution_entropy": 0.9773755694865226, "calibration/buffer_distribution_entropy": 0.987680797883996, "calibration/confidence_entropy": 0.4689914363857097, "calibration/coverage@0%": 0.01918194032750086, "calibration/coverage@1%": 0.01918194032750086, "calibration/coverage@10%": 0.15126897891005714, "calibration/coverage@15%": 0.1884479772217106, "calibration/coverage@20%": 0.22212513729806993, "calibration/coverage@25%": 0.3336199835242316, "calibration/coverage@30%": 0.4916374863301485, "calibration/coverage@5%": 0.042665306276620234, "calibration/ece": 0.1441450143384424, "calibration/mean_confidence": 0.5331028913418352, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00205078125, "completions/max_length": 935.4, "completions/max_terminated_length": 935.4, "completions/mean_length": 227.60908203125, "completions/mean_terminated_length": 228.07676391601564, "completions/min_length": 0.0, "completions/min_terminated_length": 110.8, "epoch": 0.288, "grad_norm": 0.007270668167620897, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 303324112.0, "reward": 0.9605869770050048, "reward_std": 0.10319101065397263, "rewards/accuracy_reward": 0.5412109375, "rewards/batch_coverage_0": 0.3360299587249756, "rewards/batch_coverage_1": 0.3360299587249756, "rewards/batch_coverage_10": 0.3981724143028259, "rewards/batch_coverage_15": 0.40614657998085024, "rewards/batch_coverage_20": 0.41246947050094607, "rewards/batch_coverage_25": 0.4149442493915558, "rewards/batch_coverage_5": 0.3723661780357361, "rewards/brier_reward": 0.7803151965141296, "rewards/confidence_uniqueness_reward": 0.9507789015769958, "rewards/format_reward": 0.9978515625, "rewards/frontier_entropy_batch_reward": -0.20322760939598083, "signal/accuracy_reward/centered_abs_mean": 0.108984375, "signal/accuracy_reward/group_std_mean": 0.1481735274195671, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8605485916137695, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0544921875, "signal/advantage_abs_mean": 0.739544403553009, "signal/advantage_pre_scale_abs_mean": 0.07533028572797776, "signal/advantage_pre_scale_std": 0.12444878071546554, "signal/advantage_std": 0.982927656173706, "signal/batch_coverage_0/centered_abs_mean": 0.15977685451507567, "signal/batch_coverage_0/group_std_mean": 0.20317367017269133, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03621991276741028, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002284808969125152, "signal/batch_coverage_1/centered_abs_mean": 0.15977685451507567, "signal/batch_coverage_1/group_std_mean": 0.20317367017269133, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03621991276741028, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002284808969125152, "signal/batch_coverage_10/centered_abs_mean": 0.17815843522548674, "signal/batch_coverage_10/group_std_mean": 0.22847844064235687, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04042218551039696, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0025476655922830106, "signal/batch_coverage_15/centered_abs_mean": 0.18043112754821777, "signal/batch_coverage_15/group_std_mean": 0.2313321739435196, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04092831686139107, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00258016511797905, "signal/batch_coverage_20/centered_abs_mean": 0.18301819264888763, "signal/batch_coverage_20/group_std_mean": 0.23511843979358674, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04153538718819618, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002617160230875015, "signal/batch_coverage_25/centered_abs_mean": 0.1800421804189682, "signal/batch_coverage_25/group_std_mean": 0.23162541687488555, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04084615483880043, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025746031664311887, "signal/batch_coverage_5/centered_abs_mean": 0.17082005143165588, "signal/batch_coverage_5/group_std_mean": 0.21806212067604064, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03876911178231239, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024427266791462897, "signal/brier_reward/centered_abs_mean": 0.13430711179971694, "signal/brier_reward/group_std_mean": 0.17480315566062926, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21253706514835358, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013430711254477501, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015577200055122375, "signal/confidence_uniqueness_reward/group_std_mean": 0.025766569748520853, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024803223088383675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015577200334519147, "signal/format_reward/centered_abs_mean": 0.004150390625, "signal/format_reward/group_std_mean": 0.011817089095711709, "signal/format_reward/group_zero_std_frac": 0.934375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03272081278264523, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0020751953125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27212412357330323, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3471516013145447, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43434072136878965, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027212412655353548, "step": 90 }, { "calibration/aurc": 0.2630640235337947, "calibration/batch_distribution_entropy": 0.964956944555903, "calibration/buffer_distribution_entropy": 0.9880598273968533, "calibration/confidence_entropy": 0.4528542483114846, "calibration/coverage@0%": 0.02978320095161352, "calibration/coverage@1%": 0.02978320095161352, "calibration/coverage@10%": 0.14730286635202025, "calibration/coverage@15%": 0.22133225893096964, "calibration/coverage@20%": 0.285966002839492, "calibration/coverage@25%": 0.5375250374122251, "calibration/coverage@30%": 0.6992985687425655, "calibration/coverage@5%": 0.08540424388933655, "calibration/ece": 0.10603972376971078, "calibration/mean_confidence": 0.5529362740863469, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00166015625, "completions/max_length": 921.4, "completions/max_terminated_length": 921.4, "completions/mean_length": 224.75595703125, "completions/mean_terminated_length": 225.1295593261719, "completions/min_length": 0.0, "completions/min_terminated_length": 102.2, "epoch": 0.304, "grad_norm": 0.00729918060824275, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 320555565.0, "reward": 0.9541943192481994, "reward_std": 0.10027135163545609, "rewards/accuracy_reward": 0.5359375, "rewards/batch_coverage_0": 0.34543625116348264, "rewards/batch_coverage_1": 0.34543625116348264, "rewards/batch_coverage_10": 0.38589795827865603, "rewards/batch_coverage_15": 0.3934217095375061, "rewards/batch_coverage_20": 0.4026259660720825, "rewards/batch_coverage_25": 0.4048985719680786, "rewards/batch_coverage_5": 0.36726756691932677, "rewards/brier_reward": 0.7719451546669006, "rewards/confidence_uniqueness_reward": 0.9500939846038818, "rewards/format_reward": 0.99833984375, "rewards/frontier_entropy_batch_reward": -0.22971556186676026, "signal/accuracy_reward/centered_abs_mean": 0.1065185546875, "signal/accuracy_reward/group_std_mean": 0.14071860015392304, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.86371351480484, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05325927734375, "signal/advantage_abs_mean": 0.7625090956687928, "signal/advantage_pre_scale_abs_mean": 0.07567221075296401, "signal/advantage_pre_scale_std": 0.12226974666118622, "signal/advantage_std": 0.9828738808631897, "signal/batch_coverage_0/centered_abs_mean": 0.1651271402835846, "signal/batch_coverage_0/group_std_mean": 0.20846222341060638, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.038963142409920694, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0023613180965185165, "signal/batch_coverage_1/centered_abs_mean": 0.1651271402835846, "signal/batch_coverage_1/group_std_mean": 0.20846222341060638, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.038963142409920694, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0023613180965185165, "signal/batch_coverage_10/centered_abs_mean": 0.17702984809875488, "signal/batch_coverage_10/group_std_mean": 0.22490586936473847, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.041466080397367475, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002531526843085885, "signal/batch_coverage_15/centered_abs_mean": 0.17536805868148803, "signal/batch_coverage_15/group_std_mean": 0.22317201495170594, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04111176505684853, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0025077632162719965, "signal/batch_coverage_20/centered_abs_mean": 0.17413158118724822, "signal/batch_coverage_20/group_std_mean": 0.2223893940448761, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04070866405963898, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0024900815915316343, "signal/batch_coverage_25/centered_abs_mean": 0.17070959508419037, "signal/batch_coverage_25/group_std_mean": 0.21875600814819335, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.039876680821180344, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002441147156059742, "signal/batch_coverage_5/centered_abs_mean": 0.17223967611789703, "signal/batch_coverage_5/group_std_mean": 0.21767829358577728, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0404712088406086, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002463027322664857, "signal/brier_reward/centered_abs_mean": 0.1345919907093048, "signal/brier_reward/group_std_mean": 0.17346469461917877, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21938477456569672, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013459199480712414, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015962999872863294, "signal/confidence_uniqueness_reward/group_std_mean": 0.024774506315588952, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026566693931818007, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015963000478222966, "signal/format_reward/centered_abs_mean": 0.003204345703125, "signal/format_reward/group_std_mean": 0.0090549532789737, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.025845942366868258, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016021728515625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28979048132896423, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3667328774929047, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4797462522983551, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028979047015309334, "step": 95 }, { "calibration/aurc": 0.21323978911458616, "calibration/batch_distribution_entropy": 0.9754091138368974, "calibration/buffer_distribution_entropy": 0.9878890606584776, "calibration/confidence_entropy": 0.4707118432324401, "calibration/coverage@0%": 0.04375458659491194, "calibration/coverage@1%": 0.05078583659491194, "calibration/coverage@10%": 0.33517153864970645, "calibration/coverage@15%": 0.42579959637964776, "calibration/coverage@20%": 0.556768285225049, "calibration/coverage@25%": 0.6622897810665362, "calibration/coverage@30%": 0.730684319960861, "calibration/coverage@5%": 0.19531708659491193, "calibration/ece": 0.15941081118757158, "calibration/mean_confidence": 0.5433335774149639, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 901.0, "completions/max_terminated_length": 901.0, "completions/mean_length": 216.8244140625, "completions/mean_terminated_length": 217.03798828125, "completions/min_length": 20.0, "completions/min_terminated_length": 106.0, "epoch": 0.32, "grad_norm": 0.0067256358452141285, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 337864551.0, "reward": 0.9643454909324646, "reward_std": 0.08362279087305069, "rewards/accuracy_reward": 0.5408203125, "rewards/batch_coverage_0": 0.40262268781661986, "rewards/batch_coverage_1": 0.40262268781661986, "rewards/batch_coverage_10": 0.4292051911354065, "rewards/batch_coverage_15": 0.43654964566230775, "rewards/batch_coverage_20": 0.4390207827091217, "rewards/batch_coverage_25": 0.4412606120109558, "rewards/batch_coverage_5": 0.4163574159145355, "rewards/brier_reward": 0.7956306457519531, "rewards/confidence_uniqueness_reward": 0.9509375929832459, "rewards/format_reward": 0.9990234375, "rewards/frontier_entropy_batch_reward": -0.22670443654060363, "signal/accuracy_reward/centered_abs_mean": 0.0785888671875, "signal/accuracy_reward/group_std_mean": 0.10648886114358902, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7305195569992066, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03929443359375, "signal/advantage_abs_mean": 0.76407390832901, "signal/advantage_pre_scale_abs_mean": 0.06325442418456077, "signal/advantage_pre_scale_std": 0.1052427500486374, "signal/advantage_std": 0.9826571702957153, "signal/batch_coverage_0/centered_abs_mean": 0.14719704687595367, "signal/batch_coverage_0/group_std_mean": 0.1912107139825821, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03920513764023781, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021049177274107935, "signal/batch_coverage_1/centered_abs_mean": 0.14719704687595367, "signal/batch_coverage_1/group_std_mean": 0.1912107139825821, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03920513764023781, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021049177274107935, "signal/batch_coverage_10/centered_abs_mean": 0.15465619862079621, "signal/batch_coverage_10/group_std_mean": 0.2020500361919403, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04116538017988205, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002211583498865366, "signal/batch_coverage_15/centered_abs_mean": 0.1558520555496216, "signal/batch_coverage_15/group_std_mean": 0.20389397144317628, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04150298237800598, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0022286843974143266, "signal/batch_coverage_20/centered_abs_mean": 0.15506626963615416, "signal/batch_coverage_20/group_std_mean": 0.20311373770236968, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04127655476331711, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0022174476645886896, "signal/batch_coverage_25/centered_abs_mean": 0.15622910261154174, "signal/batch_coverage_25/group_std_mean": 0.2045728623867035, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04156334474682808, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002234076149761677, "signal/batch_coverage_5/centered_abs_mean": 0.15119654536247254, "signal/batch_coverage_5/group_std_mean": 0.19683580100536346, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04023168459534645, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021621106658130883, "signal/brier_reward/centered_abs_mean": 0.11154842674732209, "signal/brier_reward/group_std_mean": 0.1480434626340866, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20754066705703736, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011154843121767044, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014669315330684185, "signal/confidence_uniqueness_reward/group_std_mean": 0.020008804649114607, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02726968452334404, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014669314958155155, "signal/format_reward/centered_abs_mean": 0.00177001953125, "signal/format_reward/group_std_mean": 0.003914954699575901, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016244655288755894, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000885009765625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28998995423316953, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36479984521865844, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5398689031600952, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028998995944857598, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.4818202421936853, "eval_calibration/batch_distribution_entropy": 0.9161004968263402, "eval_calibration/buffer_distribution_entropy": 0.9880535073418304, "eval_calibration/confidence_entropy": 0.4630894061433339, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.0859375, "eval_calibration/coverage@25%": 0.1328125, "eval_calibration/coverage@30%": 0.2265625, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.23660033525497487, "eval_calibration/mean_confidence": 0.46879126181649067, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 449.25, "eval_completions/max_terminated_length": 449.25, "eval_completions/mean_length": 221.74993133544922, "eval_completions/mean_terminated_length": 221.74993133544922, "eval_completions/min_length": 111.75, "eval_completions/min_terminated_length": 111.75, "eval_loss": 0.0, "eval_num_tokens": 337864551.0, "eval_reward": 0.7951662689447403, "eval_reward_std": 0.22639942914247513, "eval_rewards/accuracy_reward": 0.421875, "eval_rewards/batch_coverage_0": 0.1622033342719078, "eval_rewards/batch_coverage_1": 0.1622033342719078, "eval_rewards/batch_coverage_10": 0.16187801584601402, "eval_rewards/batch_coverage_15": 0.1481264792382717, "eval_rewards/batch_coverage_20": 0.13171643018722534, "eval_rewards/batch_coverage_25": 0.11638518050312996, "eval_rewards/batch_coverage_5": 0.1622033342719078, "eval_rewards/brier_reward": 0.7932350784540176, "eval_rewards/confidence_uniqueness_reward": 0.899658203125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 22.8241, "eval_samples_per_second": 21.907, "eval_signal/accuracy_reward/centered_abs_mean": 0.47021484375, "eval_signal/accuracy_reward/group_std_mean": 0.4921262636780739, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0398327857255936, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.235107421875, "eval_signal/advantage_abs_mean": 0.9330623000860214, "eval_signal/advantage_pre_scale_abs_mean": 0.21175596863031387, "eval_signal/advantage_pre_scale_std": 0.22392144799232483, "eval_signal/advantage_std": 0.9876809418201447, "eval_signal/batch_coverage_0/centered_abs_mean": 0.30005283653736115, "eval_signal/batch_coverage_0/group_std_mean": 0.3642084077000618, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018966381903737783, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004290755605325103, "eval_signal/batch_coverage_1/centered_abs_mean": 0.30005283653736115, "eval_signal/batch_coverage_1/group_std_mean": 0.3642084077000618, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018966381903737783, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004290755605325103, "eval_signal/batch_coverage_10/centered_abs_mean": 0.2983648404479027, "eval_signal/batch_coverage_10/group_std_mean": 0.36220937967300415, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018857899587601423, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004266617470420897, "eval_signal/batch_coverage_15/centered_abs_mean": 0.2660781927406788, "eval_signal/batch_coverage_15/group_std_mean": 0.3240368664264679, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016829160042107105, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.003804918087553233, "eval_signal/batch_coverage_20/centered_abs_mean": 0.2327834814786911, "eval_signal/batch_coverage_20/group_std_mean": 0.28654681891202927, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014746756991371512, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0033288037520833313, "eval_signal/batch_coverage_25/centered_abs_mean": 0.19945495203137398, "eval_signal/batch_coverage_25/group_std_mean": 0.248090460896492, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012607906712219119, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0028522057691589, "eval_signal/batch_coverage_5/centered_abs_mean": 0.30005283653736115, "eval_signal/batch_coverage_5/group_std_mean": 0.3642084077000618, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018966381903737783, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004290755605325103, "eval_signal/brier_reward/centered_abs_mean": 0.19953547045588493, "eval_signal/brier_reward/group_std_mean": 0.24987618252635002, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08829442970454693, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019953548442572355, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.040130615234375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04819970764219761, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017783273942768574, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040130615234375, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.175, "step": 100 }, { "calibration/aurc": 0.2685604926137561, "calibration/batch_distribution_entropy": 0.9600400894076688, "calibration/buffer_distribution_entropy": 0.9896598366446046, "calibration/confidence_entropy": 0.47332780847956474, "calibration/coverage@0%": 0.029347411481715973, "calibration/coverage@1%": 0.029347411481715973, "calibration/coverage@10%": 0.09506503371896705, "calibration/coverage@15%": 0.1670885644497525, "calibration/coverage@20%": 0.29349015890602814, "calibration/coverage@25%": 0.5120462328767122, "calibration/coverage@30%": 0.6164479640315029, "calibration/coverage@5%": 0.046541038932696364, "calibration/ece": 0.12776021875721216, "calibration/mean_confidence": 0.5172638957764836, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 835.2, "completions/max_terminated_length": 835.2, "completions/mean_length": 219.88203125, "completions/mean_terminated_length": 220.11773986816405, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.336, "grad_norm": 0.006541461683809757, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 354838575.0, "reward": 0.9650772452354431, "reward_std": 0.09242034554481507, "rewards/accuracy_reward": 0.5484375, "rewards/batch_coverage_0": 0.3875566303730011, "rewards/batch_coverage_1": 0.3875566303730011, "rewards/batch_coverage_10": 0.4233329236507416, "rewards/batch_coverage_15": 0.43131263852119445, "rewards/batch_coverage_20": 0.43660197257995603, "rewards/batch_coverage_25": 0.43793233633041384, "rewards/batch_coverage_5": 0.4037653625011444, "rewards/brier_reward": 0.7984023332595825, "rewards/confidence_uniqueness_reward": 0.9482085943222046, "rewards/format_reward": 0.9986328125, "rewards/frontier_entropy_batch_reward": -0.24704246819019318, "signal/accuracy_reward/centered_abs_mean": 0.088916015625, "signal/accuracy_reward/group_std_mean": 0.12266767919063568, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7738893389701843, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0444580078125, "signal/advantage_abs_mean": 0.7501138925552369, "signal/advantage_pre_scale_abs_mean": 0.06785393953323364, "signal/advantage_pre_scale_std": 0.11415418684482574, "signal/advantage_std": 0.9827425599098205, "signal/batch_coverage_0/centered_abs_mean": 0.1362660273909569, "signal/batch_coverage_0/group_std_mean": 0.17492769360542298, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03478074930608273, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019486041273921728, "signal/batch_coverage_1/centered_abs_mean": 0.1362660273909569, "signal/batch_coverage_1/group_std_mean": 0.17492769360542298, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03478074930608273, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019486041273921728, "signal/batch_coverage_10/centered_abs_mean": 0.14739642143249512, "signal/batch_coverage_10/group_std_mean": 0.19052909314632416, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03750689923763275, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021077687153592704, "signal/batch_coverage_15/centered_abs_mean": 0.14911286830902098, "signal/batch_coverage_15/group_std_mean": 0.19245946705341338, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03794231489300728, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002132314071059227, "signal/batch_coverage_20/centered_abs_mean": 0.153034570813179, "signal/batch_coverage_20/group_std_mean": 0.1976184368133545, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03890108093619347, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002188394358381629, "signal/batch_coverage_25/centered_abs_mean": 0.15436229705810547, "signal/batch_coverage_25/group_std_mean": 0.19924021661281585, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03921979740262031, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002207380859181285, "signal/batch_coverage_5/centered_abs_mean": 0.14039334505796433, "signal/batch_coverage_5/group_std_mean": 0.18035671412944793, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03583626076579094, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020076247630640863, "signal/brier_reward/centered_abs_mean": 0.11269704401493072, "signal/brier_reward/group_std_mean": 0.14758805930614471, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1994762033224106, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011269704438745975, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015488607808947563, "signal/confidence_uniqueness_reward/group_std_mean": 0.023617172613739967, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0275675717741251, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015488607343286276, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_std_mean": 0.007733980286866426, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.023863587900996207, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28287087082862855, "signal/frontier_entropy_batch_reward/group_std_mean": 0.357461279630661, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5021488547325135, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028287087753415107, "step": 105 }, { "calibration/aurc": 0.27019286399420744, "calibration/batch_distribution_entropy": 0.9298135335293258, "calibration/buffer_distribution_entropy": 0.9932549531185441, "calibration/confidence_entropy": 0.4141606750385892, "calibration/coverage@0%": 0.07255500783934542, "calibration/coverage@1%": 0.07255500783934542, "calibration/coverage@10%": 0.31170399103579516, "calibration/coverage@15%": 0.39551763876797075, "calibration/coverage@20%": 0.44607006463126186, "calibration/coverage@25%": 0.5083359075246159, "calibration/coverage@30%": 0.5835207135923078, "calibration/coverage@5%": 0.1879378452586406, "calibration/ece": 0.11614041938554136, "calibration/mean_confidence": 0.4979168544314918, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00166015625, "completions/max_length": 948.2, "completions/max_terminated_length": 948.2, "completions/mean_length": 233.632421875, "completions/mean_terminated_length": 234.02719421386718, "completions/min_length": 21.8, "completions/min_terminated_length": 111.0, "epoch": 0.352, "grad_norm": 0.00914387870579958, "learning_rate": 1e-06, "loss": -0.0094, "num_tokens": 372491387.0, "reward": 0.9311908602714538, "reward_std": 0.09648803919553757, "rewards/accuracy_reward": 0.48447265625, "rewards/batch_coverage_0": 0.38543524742126467, "rewards/batch_coverage_1": 0.38543524742126467, "rewards/batch_coverage_10": 0.4238883852958679, "rewards/batch_coverage_15": 0.42850934267044066, "rewards/batch_coverage_20": 0.43640496134757994, "rewards/batch_coverage_25": 0.44086662530899046, "rewards/batch_coverage_5": 0.41723122596740725, "rewards/brier_reward": 0.7983362555503846, "rewards/confidence_uniqueness_reward": 0.9474457859992981, "rewards/format_reward": 0.99814453125, "rewards/frontier_entropy_batch_reward": -0.2642006158828735, "signal/accuracy_reward/centered_abs_mean": 0.099725341796875, "signal/accuracy_reward/group_std_mean": 0.1285892456769943, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8679127812385559, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0498626708984375, "signal/advantage_abs_mean": 0.7641012310981751, "signal/advantage_pre_scale_abs_mean": 0.0740143045783043, "signal/advantage_pre_scale_std": 0.11975871622562409, "signal/advantage_std": 0.9827718019485474, "signal/batch_coverage_0/centered_abs_mean": 0.14283648133277893, "signal/batch_coverage_0/group_std_mean": 0.18093341290950776, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03563266433775425, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002042561722919345, "signal/batch_coverage_1/centered_abs_mean": 0.14283648133277893, "signal/batch_coverage_1/group_std_mean": 0.18093341290950776, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03563266433775425, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002042561722919345, "signal/batch_coverage_10/centered_abs_mean": 0.15249529480934143, "signal/batch_coverage_10/group_std_mean": 0.19459120631217958, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.038044761121273044, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002180682681500912, "signal/batch_coverage_15/centered_abs_mean": 0.15441205203533173, "signal/batch_coverage_15/group_std_mean": 0.19762863516807555, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03852261155843735, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002208092389628291, "signal/batch_coverage_20/centered_abs_mean": 0.1582718998193741, "signal/batch_coverage_20/group_std_mean": 0.2032813996076584, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03947751969099045, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002263288199901581, "signal/batch_coverage_25/centered_abs_mean": 0.1593154788017273, "signal/batch_coverage_25/group_std_mean": 0.20504648387432098, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.039732877910137174, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0022782113403081892, "signal/batch_coverage_5/centered_abs_mean": 0.15034010708332063, "signal/batch_coverage_5/group_std_mean": 0.19178448617458344, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03750232979655266, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002149863541126251, "signal/brier_reward/centered_abs_mean": 0.1160271480679512, "signal/brier_reward/group_std_mean": 0.14954084753990174, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20231397449970245, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011602715216577053, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017225971072912218, "signal/confidence_uniqueness_reward/group_std_mean": 0.02534388713538647, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030038028210401534, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001722597167827189, "signal/format_reward/centered_abs_mean": 0.003497314453125, "signal/format_reward/group_std_mean": 0.008663824107497931, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.030156026408076287, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0017486572265625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30242173075675965, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37537208795547483, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5284075975418091, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03024217374622822, "step": 110 }, { "calibration/aurc": 0.32133141547128696, "calibration/batch_distribution_entropy": 0.9596077713361109, "calibration/buffer_distribution_entropy": 0.9951584201390368, "calibration/confidence_entropy": 0.4573241392343905, "calibration/coverage@0%": 0.030078125, "calibration/coverage@1%": 0.030078125, "calibration/coverage@10%": 0.11796875, "calibration/coverage@15%": 0.183984375, "calibration/coverage@20%": 0.3625, "calibration/coverage@25%": 0.406640625, "calibration/coverage@30%": 0.462109375, "calibration/coverage@5%": 0.0546875, "calibration/ece": 0.131855792371392, "calibration/mean_confidence": 0.49068173815499366, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 929.6, "completions/max_terminated_length": 929.6, "completions/mean_length": 238.81806640625, "completions/mean_terminated_length": 239.05218811035155, "completions/min_length": 0.0, "completions/min_terminated_length": 105.2, "epoch": 0.368, "grad_norm": 0.007342960219830275, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 390002356.0, "reward": 0.9485709071159363, "reward_std": 0.08474339246749878, "rewards/accuracy_reward": 0.51044921875, "rewards/batch_coverage_0": 0.39238272309303285, "rewards/batch_coverage_1": 0.39238272309303285, "rewards/batch_coverage_10": 0.4261459052562714, "rewards/batch_coverage_15": 0.4329572141170502, "rewards/batch_coverage_20": 0.43899570107460023, "rewards/batch_coverage_25": 0.440056574344635, "rewards/batch_coverage_5": 0.41612568497657776, "rewards/brier_reward": 0.8009498476982116, "rewards/confidence_uniqueness_reward": 0.9502113699913025, "rewards/format_reward": 0.9990234375, "rewards/frontier_entropy_batch_reward": -0.23309923112392425, "signal/accuracy_reward/centered_abs_mean": 0.082708740234375, "signal/accuracy_reward/group_std_mean": 0.11486676782369613, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8056630492210388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0413543701171875, "signal/advantage_abs_mean": 0.7527588725090026, "signal/advantage_pre_scale_abs_mean": 0.06306558772921562, "signal/advantage_pre_scale_std": 0.10630078315734863, "signal/advantage_std": 0.9825676798820495, "signal/batch_coverage_0/centered_abs_mean": 0.139876489341259, "signal/batch_coverage_0/group_std_mean": 0.17783224880695342, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03907948359847069, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020002338802441954, "signal/batch_coverage_1/centered_abs_mean": 0.139876489341259, "signal/batch_coverage_1/group_std_mean": 0.17783224880695342, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03907948359847069, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020002338802441954, "signal/batch_coverage_10/centered_abs_mean": 0.14939309060573577, "signal/batch_coverage_10/group_std_mean": 0.19122098684310912, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04172439575195312, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021363211795687675, "signal/batch_coverage_15/centered_abs_mean": 0.15046306550502778, "signal/batch_coverage_15/group_std_mean": 0.1928224891424179, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04200417771935463, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021516217617318033, "signal/batch_coverage_20/centered_abs_mean": 0.15322479605674744, "signal/batch_coverage_20/group_std_mean": 0.1975228577852249, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04276901260018349, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021911146119236945, "signal/batch_coverage_25/centered_abs_mean": 0.1543430656194687, "signal/batch_coverage_25/group_std_mean": 0.19888520240783691, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04310151115059853, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002207105793058872, "signal/batch_coverage_5/centered_abs_mean": 0.14643795192241668, "signal/batch_coverage_5/group_std_mean": 0.18699788451194763, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.040923018008470535, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002094062673859298, "signal/brier_reward/centered_abs_mean": 0.10724746435880661, "signal/brier_reward/group_std_mean": 0.140754859149456, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20941689908504485, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010724746435880662, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014008820429444313, "signal/confidence_uniqueness_reward/group_std_mean": 0.02020746245980263, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027339933440089226, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014008820755407215, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.0051879632286727425, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01840692777186632, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2798162639141083, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35085987448692324, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5465050220489502, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027981626987457275, "step": 115 }, { "calibration/aurc": 0.30339147131241734, "calibration/batch_distribution_entropy": 0.9563765817949786, "calibration/buffer_distribution_entropy": 0.9954902797933238, "calibration/confidence_entropy": 0.44445111541300547, "calibration/coverage@0%": 0.08164674947087455, "calibration/coverage@1%": 0.13672487447087453, "calibration/coverage@10%": 0.27734987447087456, "calibration/coverage@15%": 0.30899049947087454, "calibration/coverage@20%": 0.3421974466333012, "calibration/coverage@25%": 0.3824318216333012, "calibration/coverage@30%": 0.46457662214675943, "calibration/coverage@5%": 0.22500612447087454, "calibration/ece": 0.15227414567768388, "calibration/mean_confidence": 0.4745961122709959, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 861.6, "completions/max_terminated_length": 861.6, "completions/mean_length": 231.5802734375, "completions/mean_terminated_length": 231.82743225097656, "completions/min_length": 19.8, "completions/min_terminated_length": 105.0, "epoch": 0.384, "grad_norm": 0.006414992269128561, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 407230250.0, "reward": 0.9628369092941285, "reward_std": 0.08602556586265564, "rewards/accuracy_reward": 0.540234375, "rewards/batch_coverage_0": 0.3981931030750275, "rewards/batch_coverage_1": 0.3981931030750275, "rewards/batch_coverage_10": 0.43429911732673643, "rewards/batch_coverage_15": 0.4370169997215271, "rewards/batch_coverage_20": 0.4419554710388184, "rewards/batch_coverage_25": 0.44801422357559206, "rewards/batch_coverage_5": 0.4173740684986115, "rewards/brier_reward": 0.8051711320877075, "rewards/confidence_uniqueness_reward": 0.9488577246665955, "rewards/format_reward": 0.998828125, "rewards/frontier_entropy_batch_reward": -0.24640387594699859, "signal/accuracy_reward/centered_abs_mean": 0.090576171875, "signal/accuracy_reward/group_std_mean": 0.12041936963796615, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8770391941070557, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0452880859375, "signal/advantage_abs_mean": 0.7650493621826172, "signal/advantage_pre_scale_abs_mean": 0.06524530351161957, "signal/advantage_pre_scale_std": 0.10862657576799392, "signal/advantage_std": 0.982580029964447, "signal/batch_coverage_0/centered_abs_mean": 0.14904703795909882, "signal/batch_coverage_0/group_std_mean": 0.1901983439922333, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.041442494839429855, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021313726902008057, "signal/batch_coverage_1/centered_abs_mean": 0.14904703795909882, "signal/batch_coverage_1/group_std_mean": 0.1901983439922333, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.041442494839429855, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021313726902008057, "signal/batch_coverage_10/centered_abs_mean": 0.1577294200658798, "signal/batch_coverage_10/group_std_mean": 0.20299181640148162, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04381066411733627, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0022555307485163214, "signal/batch_coverage_15/centered_abs_mean": 0.1570695459842682, "signal/batch_coverage_15/group_std_mean": 0.20258375704288484, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04360567554831505, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0022460945881903173, "signal/batch_coverage_20/centered_abs_mean": 0.1576721489429474, "signal/batch_coverage_20/group_std_mean": 0.20356932580471038, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.043791229277849196, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002254711743444204, "signal/batch_coverage_25/centered_abs_mean": 0.16238428950309752, "signal/batch_coverage_25/group_std_mean": 0.209498855471611, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04516012445092201, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0023220953065901996, "signal/batch_coverage_5/centered_abs_mean": 0.15410350263118744, "signal/batch_coverage_5/group_std_mean": 0.19714944660663605, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04280069917440414, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002203680109232664, "signal/brier_reward/centered_abs_mean": 0.1032984122633934, "signal/brier_reward/group_std_mean": 0.1353047162294388, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20016059279441833, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010329841263592243, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015139190666377544, "signal/confidence_uniqueness_reward/group_std_mean": 0.022230926714837552, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029904866591095924, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001513919117860496, "signal/format_reward/centered_abs_mean": 0.00225830078125, "signal/format_reward/group_std_mean": 0.0062928176019340755, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02292755376547575, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001129150390625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28261598348617556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3594805419445038, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5510689675807953, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02826159931719303, "step": 120 }, { "calibration/aurc": 0.3887606816696102, "calibration/batch_distribution_entropy": 0.9813992911999595, "calibration/buffer_distribution_entropy": 0.9948418303817892, "calibration/confidence_entropy": 0.492348453496467, "calibration/coverage@0%": 0.005473354616895874, "calibration/coverage@1%": 0.005473354616895874, "calibration/coverage@10%": 0.007426479616895874, "calibration/coverage@15%": 0.014848354616895876, "calibration/coverage@20%": 0.0964782355108055, "calibration/coverage@25%": 0.22515502210216107, "calibration/coverage@30%": 0.3283652075147348, "calibration/coverage@5%": 0.005473354616895874, "calibration/ece": 0.14469285404897078, "calibration/mean_confidence": 0.5267769005041314, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 673.0, "completions/max_terminated_length": 673.0, "completions/mean_length": 213.291015625, "completions/mean_terminated_length": 213.37777099609374, "completions/min_length": 86.0, "completions/min_terminated_length": 106.4, "epoch": 0.4, "grad_norm": 0.007502261083573103, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 424450798.0, "reward": 0.9512521266937256, "reward_std": 0.09306368678808212, "rewards/accuracy_reward": 0.523046875, "rewards/batch_coverage_0": 0.3664769411087036, "rewards/batch_coverage_1": 0.3664769411087036, "rewards/batch_coverage_10": 0.39776320457458497, "rewards/batch_coverage_15": 0.4013418197631836, "rewards/batch_coverage_20": 0.41172993183135986, "rewards/batch_coverage_25": 0.4154829740524292, "rewards/batch_coverage_5": 0.3841669142246246, "rewards/brier_reward": 0.792135500907898, "rewards/confidence_uniqueness_reward": 0.9506557941436767, "rewards/format_reward": 0.999609375, "rewards/frontier_entropy_batch_reward": -0.23586300015449524, "signal/accuracy_reward/centered_abs_mean": 0.10125732421875, "signal/accuracy_reward/group_std_mean": 0.13127839118242263, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9214963674545288, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.050628662109375, "signal/advantage_abs_mean": 0.7701701760292053, "signal/advantage_pre_scale_abs_mean": 0.07243436127901078, "signal/advantage_pre_scale_std": 0.11784504354000092, "signal/advantage_std": 0.9826903700828552, "signal/batch_coverage_0/centered_abs_mean": 0.13559473752975465, "signal/batch_coverage_0/group_std_mean": 0.1722516745328903, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.035601938143372536, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001939004845917225, "signal/batch_coverage_1/centered_abs_mean": 0.13559473752975465, "signal/batch_coverage_1/group_std_mean": 0.1722516745328903, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.035601938143372536, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001939004845917225, "signal/batch_coverage_10/centered_abs_mean": 0.14347952902317046, "signal/batch_coverage_10/group_std_mean": 0.18395382463932036, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03765551820397377, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002051757252775133, "signal/batch_coverage_15/centered_abs_mean": 0.1431223601102829, "signal/batch_coverage_15/group_std_mean": 0.18346399664878846, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.037582477927207945, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002046649740077555, "signal/batch_coverage_20/centered_abs_mean": 0.14731760025024415, "signal/batch_coverage_20/group_std_mean": 0.18973099887371064, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.038676262646913526, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021066416520625353, "signal/batch_coverage_25/centered_abs_mean": 0.14638057053089143, "signal/batch_coverage_25/group_std_mean": 0.18917132019996644, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.038373632729053496, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020932421321049333, "signal/batch_coverage_5/centered_abs_mean": 0.13988438844680787, "signal/batch_coverage_5/group_std_mean": 0.17847622334957122, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.036685329675674436, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020003467332571747, "signal/brier_reward/centered_abs_mean": 0.11065952330827714, "signal/brier_reward/group_std_mean": 0.14248354136943817, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20170999467372894, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01106595303863287, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013073159195482732, "signal/confidence_uniqueness_reward/group_std_mean": 0.017292667552828787, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02388041839003563, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013073159847408534, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002485871315, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006573101878166199, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27541821002960204, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3459975838661194, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5035546779632568, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027541821449995042, "step": 125 }, { "calibration/aurc": 0.27751930971271366, "calibration/batch_distribution_entropy": 0.9501913731130441, "calibration/buffer_distribution_entropy": 0.9958043564857535, "calibration/confidence_entropy": 0.4522949540641205, "calibration/coverage@0%": 0.021115918542074362, "calibration/coverage@1%": 0.021115918542074362, "calibration/coverage@10%": 0.1340845156555773, "calibration/coverage@15%": 0.1720087756849315, "calibration/coverage@20%": 0.22007705479452055, "calibration/coverage@25%": 0.42624067392367904, "calibration/coverage@30%": 0.6247683769569472, "calibration/coverage@5%": 0.0973359527886497, "calibration/ece": 0.10802325633813177, "calibration/mean_confidence": 0.5223946163504338, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 754.0, "completions/max_terminated_length": 754.0, "completions/mean_length": 209.23974609375, "completions/mean_terminated_length": 209.38225402832032, "completions/min_length": 39.6, "completions/min_terminated_length": 100.8, "epoch": 0.416, "grad_norm": 0.006731382571160793, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 441474597.0, "reward": 0.954073143005371, "reward_std": 0.08691072165966034, "rewards/accuracy_reward": 0.5236328125, "rewards/batch_coverage_0": 0.40355276465415957, "rewards/batch_coverage_1": 0.40355276465415957, "rewards/batch_coverage_10": 0.4401559591293335, "rewards/batch_coverage_15": 0.4462449550628662, "rewards/batch_coverage_20": 0.44900824427604674, "rewards/batch_coverage_25": 0.45447943210601804, "rewards/batch_coverage_5": 0.42884148359298707, "rewards/brier_reward": 0.8059401035308837, "rewards/confidence_uniqueness_reward": 0.9467169284820557, "rewards/format_reward": 0.99931640625, "rewards/frontier_entropy_batch_reward": -0.25936628580093385, "signal/accuracy_reward/centered_abs_mean": 0.0849365234375, "signal/accuracy_reward/group_std_mean": 0.11482858657836914, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.780875825881958, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04246826171875, "signal/advantage_abs_mean": 0.7536841630935669, "signal/advantage_pre_scale_abs_mean": 0.0655998557806015, "signal/advantage_pre_scale_std": 0.1087318018078804, "signal/advantage_std": 0.9826650023460388, "signal/batch_coverage_0/centered_abs_mean": 0.1364029973745346, "signal/batch_coverage_0/group_std_mean": 0.1733390212059021, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.036052515357732774, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019505629083141685, "signal/batch_coverage_1/centered_abs_mean": 0.1364029973745346, "signal/batch_coverage_1/group_std_mean": 0.1733390212059021, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.036052515357732774, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019505629083141685, "signal/batch_coverage_10/centered_abs_mean": 0.14312728643417358, "signal/batch_coverage_10/group_std_mean": 0.18380964994430543, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03788130059838295, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020467202179133893, "signal/batch_coverage_15/centered_abs_mean": 0.14330851435661315, "signal/batch_coverage_15/group_std_mean": 0.18487076759338378, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.037964475154876706, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020493117161095144, "signal/batch_coverage_20/centered_abs_mean": 0.1435598075389862, "signal/batch_coverage_20/group_std_mean": 0.18555756211280822, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03806317374110222, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002052905270829797, "signal/batch_coverage_25/centered_abs_mean": 0.1446547716856003, "signal/batch_coverage_25/group_std_mean": 0.1876140534877777, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.038419923186302184, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002068563178181648, "signal/batch_coverage_5/centered_abs_mean": 0.14238941073417663, "signal/batch_coverage_5/group_std_mean": 0.18227325677871703, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03768376782536507, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020361685194075106, "signal/brier_reward/centered_abs_mean": 0.10666648000478744, "signal/brier_reward/group_std_mean": 0.13855001628398894, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19812132120132447, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010666648298501969, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01669100560247898, "signal/confidence_uniqueness_reward/group_std_mean": 0.022906759381294252, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03130178637802601, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016691005090251564, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01281973384320736, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2834647178649902, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3607568025588989, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5266608476638794, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02834647297859192, "step": 130 }, { "calibration/aurc": 0.2348983777396676, "calibration/batch_distribution_entropy": 0.960256795358017, "calibration/buffer_distribution_entropy": 0.9943258434526356, "calibration/confidence_entropy": 0.4422803777578306, "calibration/coverage@0%": 0.0546875, "calibration/coverage@1%": 0.081640625, "calibration/coverage@10%": 0.253515625, "calibration/coverage@15%": 0.32229161570450093, "calibration/coverage@20%": 0.4437775195694716, "calibration/coverage@25%": 0.5684663955479452, "calibration/coverage@30%": 0.6654231898238747, "calibration/coverage@5%": 0.166796875, "calibration/ece": 0.1278217715909456, "calibration/mean_confidence": 0.5304076012280948, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 795.4, "completions/max_terminated_length": 795.4, "completions/mean_length": 215.02705078125, "completions/mean_terminated_length": 215.13265686035157, "completions/min_length": 21.2, "completions/min_terminated_length": 101.8, "epoch": 0.432, "grad_norm": 0.007011502515524626, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 458690810.0, "reward": 0.9772215843200683, "reward_std": 0.08537227213382721, "rewards/accuracy_reward": 0.56171875, "rewards/batch_coverage_0": 0.429417085647583, "rewards/batch_coverage_1": 0.429417085647583, "rewards/batch_coverage_10": 0.45698114633560183, "rewards/batch_coverage_15": 0.4633796989917755, "rewards/batch_coverage_20": 0.46579416990280154, "rewards/batch_coverage_25": 0.46876177191734314, "rewards/batch_coverage_5": 0.448213255405426, "rewards/brier_reward": 0.8230858683586121, "rewards/confidence_uniqueness_reward": 0.9489996194839477, "rewards/format_reward": 0.99951171875, "rewards/frontier_entropy_batch_reward": -0.2581829369068146, "signal/accuracy_reward/centered_abs_mean": 0.08834228515625, "signal/accuracy_reward/group_std_mean": 0.11721137315034866, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8492055177688599, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044171142578125, "signal/advantage_abs_mean": 0.7724653244018554, "signal/advantage_pre_scale_abs_mean": 0.06635667979717255, "signal/advantage_pre_scale_std": 0.10899174213409424, "signal/advantage_std": 0.982582688331604, "signal/batch_coverage_0/centered_abs_mean": 0.1260914087295532, "signal/batch_coverage_0/group_std_mean": 0.16242744624614716, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03515940457582474, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018031070707365871, "signal/batch_coverage_1/centered_abs_mean": 0.1260914087295532, "signal/batch_coverage_1/group_std_mean": 0.16242744624614716, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03515940457582474, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018031070707365871, "signal/batch_coverage_10/centered_abs_mean": 0.13288309574127197, "signal/batch_coverage_10/group_std_mean": 0.17225308418273927, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03706081435084343, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019002282759174704, "signal/batch_coverage_15/centered_abs_mean": 0.13253578245639802, "signal/batch_coverage_15/group_std_mean": 0.17227967381477355, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03687136918306351, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0018952616723254324, "signal/batch_coverage_20/centered_abs_mean": 0.1341342270374298, "signal/batch_coverage_20/group_std_mean": 0.17456189095973967, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03733382299542427, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.001918119378387928, "signal/batch_coverage_25/centered_abs_mean": 0.13528763949871064, "signal/batch_coverage_25/group_std_mean": 0.17607759833335876, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03762383908033371, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0019346131943166257, "signal/batch_coverage_5/centered_abs_mean": 0.1318855404853821, "signal/batch_coverage_5/group_std_mean": 0.17036759853363037, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03678369112312794, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0018859632313251496, "signal/brier_reward/centered_abs_mean": 0.09676974564790726, "signal/brier_reward/group_std_mean": 0.1267983391880989, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1873602271080017, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009676975198090076, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014930117689073086, "signal/confidence_uniqueness_reward/group_std_mean": 0.019978737458586693, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028808726742863656, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014930118340998888, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009044526517391205, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29253311157226564, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3665455937385559, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5665589928627014, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029253310710191726, "step": 135 }, { "calibration/aurc": 0.24297186329614986, "calibration/batch_distribution_entropy": 0.9644706631929182, "calibration/buffer_distribution_entropy": 0.9927333410589829, "calibration/confidence_entropy": 0.4905251186083281, "calibration/coverage@0%": 0.031253822162426614, "calibration/coverage@1%": 0.031253822162426614, "calibration/coverage@10%": 0.1747102800880626, "calibration/coverage@15%": 0.26385380993150687, "calibration/coverage@20%": 0.3224850171232877, "calibration/coverage@25%": 0.4940076137475538, "calibration/coverage@30%": 0.704204378669276, "calibration/coverage@5%": 0.08285989481409002, "calibration/ece": 0.10222825601300542, "calibration/mean_confidence": 0.5681869079373103, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 871.4, "completions/max_terminated_length": 871.4, "completions/mean_length": 240.14736328125, "completions/mean_terminated_length": 240.3585632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.448, "grad_norm": 0.006543621886521578, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 476102719.0, "reward": 0.9629115462303162, "reward_std": 0.0858668938279152, "rewards/accuracy_reward": 0.53369140625, "rewards/batch_coverage_0": 0.39791461229324343, "rewards/batch_coverage_1": 0.39791461229324343, "rewards/batch_coverage_10": 0.42843729853630064, "rewards/batch_coverage_15": 0.43190144896507265, "rewards/batch_coverage_20": 0.43552638292312623, "rewards/batch_coverage_25": 0.4384331822395325, "rewards/batch_coverage_5": 0.41683014035224913, "rewards/brier_reward": 0.8179983615875244, "rewards/confidence_uniqueness_reward": 0.9507858991622925, "rewards/format_reward": 0.99912109375, "rewards/frontier_entropy_batch_reward": -0.2251463621854782, "signal/accuracy_reward/centered_abs_mean": 0.086688232421875, "signal/accuracy_reward/group_std_mean": 0.11191904991865158, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8156797289848328, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0433441162109375, "signal/advantage_abs_mean": 0.770751690864563, "signal/advantage_pre_scale_abs_mean": 0.06612677872180939, "signal/advantage_pre_scale_std": 0.10868151634931564, "signal/advantage_std": 0.9826421141624451, "signal/batch_coverage_0/centered_abs_mean": 0.13506021797657014, "signal/batch_coverage_0/group_std_mean": 0.16939607560634612, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03622937873005867, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019313612021505833, "signal/batch_coverage_1/centered_abs_mean": 0.13506021797657014, "signal/batch_coverage_1/group_std_mean": 0.16939607560634612, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03622937873005867, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019313612021505833, "signal/batch_coverage_10/centered_abs_mean": 0.14403230249881743, "signal/batch_coverage_10/group_std_mean": 0.18216087818145751, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03862129971385002, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020596618996933103, "signal/batch_coverage_15/centered_abs_mean": 0.1450017899274826, "signal/batch_coverage_15/group_std_mean": 0.18356646001338958, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03888870552182198, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002073525660671294, "signal/batch_coverage_20/centered_abs_mean": 0.14507793486118317, "signal/batch_coverage_20/group_std_mean": 0.18390361666679383, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0389101043343544, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002074614446610212, "signal/batch_coverage_25/centered_abs_mean": 0.1421796977519989, "signal/batch_coverage_25/group_std_mean": 0.18051582276821138, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03810642510652542, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020331696607172487, "signal/batch_coverage_5/centered_abs_mean": 0.1403295874595642, "signal/batch_coverage_5/group_std_mean": 0.17662979066371917, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.037645730376243594, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020067131146788595, "signal/brier_reward/centered_abs_mean": 0.09719461649656295, "signal/brier_reward/group_std_mean": 0.12743473201990127, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18190354406833648, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009719461761415004, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013948087580502033, "signal/confidence_uniqueness_reward/group_std_mean": 0.02008185051381588, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02622562162578106, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013948087580502033, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016198099590837955, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28304690420627593, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3540629267692566, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5298369646072387, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02830469198524952, "step": 140 }, { "calibration/aurc": 0.3804163715227877, "calibration/batch_distribution_entropy": 0.9761867561052131, "calibration/buffer_distribution_entropy": 0.9923661442430166, "calibration/confidence_entropy": 0.4707709836599577, "calibration/coverage@0%": 0.0023452788649706456, "calibration/coverage@1%": 0.0023452788649706456, "calibration/coverage@10%": 0.006251528864970646, "calibration/coverage@15%": 0.026173403864970645, "calibration/coverage@20%": 0.10903406311154598, "calibration/coverage@25%": 0.30165193860078277, "calibration/coverage@30%": 0.3919398238747554, "calibration/coverage@5%": 0.0023452788649706456, "calibration/ece": 0.12226193847865793, "calibration/mean_confidence": 0.49554941778696, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0015625, "completions/max_length": 1013.2, "completions/max_terminated_length": 1013.2, "completions/mean_length": 261.70986328125, "completions/mean_terminated_length": 262.12346801757815, "completions/min_length": 23.0, "completions/min_terminated_length": 121.0, "epoch": 0.464, "grad_norm": 0.007262748200446367, "learning_rate": 1e-06, "loss": -0.0074, "num_tokens": 493953444.0, "reward": 0.9251135468482972, "reward_std": 0.08157364279031754, "rewards/accuracy_reward": 0.46767578125, "rewards/batch_coverage_0": 0.4047118484973907, "rewards/batch_coverage_1": 0.4047118484973907, "rewards/batch_coverage_10": 0.4350186824798584, "rewards/batch_coverage_15": 0.44130164980888364, "rewards/batch_coverage_20": 0.4450684428215027, "rewards/batch_coverage_25": 0.4465096712112427, "rewards/batch_coverage_5": 0.4212812721729279, "rewards/brier_reward": 0.7872581958770752, "rewards/confidence_uniqueness_reward": 0.9485163807868957, "rewards/format_reward": 0.99833984375, "rewards/frontier_entropy_batch_reward": -0.24351753890514374, "signal/accuracy_reward/centered_abs_mean": 0.067730712890625, "signal/accuracy_reward/group_std_mean": 0.0957780659198761, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6515132248401642, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0338653564453125, "signal/advantage_abs_mean": 0.7560706853866577, "signal/advantage_pre_scale_abs_mean": 0.05970721915364265, "signal/advantage_pre_scale_std": 0.10258275270462036, "signal/advantage_std": 0.9825798630714416, "signal/batch_coverage_0/centered_abs_mean": 0.13802915811538696, "signal/batch_coverage_0/group_std_mean": 0.17626523077487946, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0383618026971817, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001973817031830549, "signal/batch_coverage_1/centered_abs_mean": 0.13802915811538696, "signal/batch_coverage_1/group_std_mean": 0.17626523077487946, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0383618026971817, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001973817031830549, "signal/batch_coverage_10/centered_abs_mean": 0.14598531723022462, "signal/batch_coverage_10/group_std_mean": 0.18837699592113494, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040507327765226364, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020875899121165276, "signal/batch_coverage_15/centered_abs_mean": 0.14799394309520722, "signal/batch_coverage_15/group_std_mean": 0.1916155368089676, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041080842912197116, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021163133904337885, "signal/batch_coverage_20/centered_abs_mean": 0.14927698969841002, "signal/batch_coverage_20/group_std_mean": 0.19369731545448304, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04144330024719238, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002134660817682743, "signal/batch_coverage_25/centered_abs_mean": 0.14754628241062165, "signal/batch_coverage_25/group_std_mean": 0.19175436496734619, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04096244126558304, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021099118515849114, "signal/batch_coverage_5/centered_abs_mean": 0.14244378805160524, "signal/batch_coverage_5/group_std_mean": 0.18289848864078523, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.039581865072250366, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002036946127191186, "signal/brier_reward/centered_abs_mean": 0.11198951303958893, "signal/brier_reward/group_std_mean": 0.1472228556871414, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21724056005477904, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011198951117694377, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01595969293266535, "signal/confidence_uniqueness_reward/group_std_mean": 0.024591311067342757, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03095446974039078, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001595969288609922, "signal/format_reward/centered_abs_mean": 0.003204345703125, "signal/format_reward/group_std_mean": 0.009054953465238214, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03110705818980932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016021728515625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28415713310241697, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3556749701499939, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5508628249168396, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02841571271419525, "step": 145 }, { "calibration/aurc": 0.2458841873288713, "calibration/batch_distribution_entropy": 0.9603942966500225, "calibration/buffer_distribution_entropy": 0.9914970698215189, "calibration/confidence_entropy": 0.4505998554101692, "calibration/coverage@0%": 0.04422625942500288, "calibration/coverage@1%": 0.04422625942500288, "calibration/coverage@10%": 0.2567775813236254, "calibration/coverage@15%": 0.3147055226200069, "calibration/coverage@20%": 0.3691025862399754, "calibration/coverage@25%": 0.4325483541019147, "calibration/coverage@30%": 0.6278100598115959, "calibration/coverage@5%": 0.11351677014984074, "calibration/ece": 0.1281255298427196, "calibration/mean_confidence": 0.5108871765777298, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 1096.2, "completions/max_terminated_length": 1096.2, "completions/mean_length": 270.9138671875, "completions/mean_terminated_length": 271.7705505371094, "completions/min_length": 0.0, "completions/min_terminated_length": 126.2, "epoch": 0.48, "grad_norm": 0.006920052692294121, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 511775634.0, "reward": 0.9541318893432618, "reward_std": 0.09775560349225998, "rewards/accuracy_reward": 0.53134765625, "rewards/batch_coverage_0": 0.3797847807407379, "rewards/batch_coverage_1": 0.3797847807407379, "rewards/batch_coverage_10": 0.4200979769229889, "rewards/batch_coverage_15": 0.4258685171604156, "rewards/batch_coverage_20": 0.432542085647583, "rewards/batch_coverage_25": 0.43280801773071287, "rewards/batch_coverage_5": 0.40181149244308473, "rewards/brier_reward": 0.7927872776985169, "rewards/confidence_uniqueness_reward": 0.9472802758216858, "rewards/format_reward": 0.996875, "rewards/frontier_entropy_batch_reward": -0.2506578862667084, "signal/accuracy_reward/centered_abs_mean": 0.108282470703125, "signal/accuracy_reward/group_std_mean": 0.14222493767738342, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9324786543846131, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0541412353515625, "signal/advantage_abs_mean": 0.7579309821128846, "signal/advantage_pre_scale_abs_mean": 0.07357524037361145, "signal/advantage_pre_scale_std": 0.1220558226108551, "signal/advantage_std": 0.9827972650527954, "signal/batch_coverage_0/centered_abs_mean": 0.15051166117191314, "signal/batch_coverage_0/group_std_mean": 0.1916230082511902, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03701958805322647, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021523167844861744, "signal/batch_coverage_1/centered_abs_mean": 0.15051166117191314, "signal/batch_coverage_1/group_std_mean": 0.1916230082511902, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03701958805322647, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021523167844861744, "signal/batch_coverage_10/centered_abs_mean": 0.15894248485565185, "signal/batch_coverage_10/group_std_mean": 0.20470293164253234, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03905408829450607, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0022728775627911093, "signal/batch_coverage_15/centered_abs_mean": 0.15886342227458955, "signal/batch_coverage_15/group_std_mean": 0.20532366633415222, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03902169317007065, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002271746890619397, "signal/batch_coverage_20/centered_abs_mean": 0.16236689388751985, "signal/batch_coverage_20/group_std_mean": 0.21032328605651857, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03986440747976303, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0023218465503305197, "signal/batch_coverage_25/centered_abs_mean": 0.16170798540115355, "signal/batch_coverage_25/group_std_mean": 0.2097803145647049, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03969116657972336, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0023124242201447488, "signal/batch_coverage_5/centered_abs_mean": 0.15605857968330383, "signal/batch_coverage_5/group_std_mean": 0.20000146925449372, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03839111030101776, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00223163771443069, "signal/brier_reward/centered_abs_mean": 0.11516801714897155, "signal/brier_reward/group_std_mean": 0.150071182847023, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19804134964942932, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01151680201292038, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018391324393451213, "signal/confidence_uniqueness_reward/group_std_mean": 0.02773902639746666, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03177299872040749, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001839132490567863, "signal/format_reward/centered_abs_mean": 0.00570068359375, "signal/format_reward/group_std_mean": 0.012306397967040538, "signal/format_reward/group_zero_std_frac": 0.94375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04965853579342365, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002850341796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29401772022247313, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3682851493358612, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5058897852897644, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02940177321434021, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.42659577713684294, "eval_calibration/batch_distribution_entropy": 0.9184063160188691, "eval_calibration/buffer_distribution_entropy": 0.9908475784106092, "eval_calibration/confidence_entropy": 0.43509464264808095, "eval_calibration/coverage@0%": 0.1171875, "eval_calibration/coverage@1%": 0.1171875, "eval_calibration/coverage@10%": 0.15625, "eval_calibration/coverage@15%": 0.15625, "eval_calibration/coverage@20%": 0.234375, "eval_calibration/coverage@25%": 0.2578125, "eval_calibration/coverage@30%": 0.2734375, "eval_calibration/coverage@5%": 0.1171875, "eval_calibration/ece": 0.22639856153688892, "eval_calibration/mean_confidence": 0.5023909767807211, "eval_completions/clipped_ratio": 0.004108297413793094, "eval_completions/max_length": 947.25, "eval_completions/max_terminated_length": 947.25, "eval_completions/mean_length": 272.63745880126953, "eval_completions/mean_terminated_length": 273.78321838378906, "eval_completions/min_length": 68.75, "eval_completions/min_terminated_length": 135.0, "eval_loss": 0.0, "eval_num_tokens": 511775634.0, "eval_reward": 0.8025836795568466, "eval_reward_std": 0.2362028956413269, "eval_rewards/accuracy_reward": 0.439453125, "eval_rewards/batch_coverage_0": 0.16702717542648315, "eval_rewards/batch_coverage_1": 0.16702717542648315, "eval_rewards/batch_coverage_10": 0.16157682612538338, "eval_rewards/batch_coverage_15": 0.15290211886167526, "eval_rewards/batch_coverage_20": 0.13275382481515408, "eval_rewards/batch_coverage_25": 0.12572277709841728, "eval_rewards/batch_coverage_5": 0.16702717542648315, "eval_rewards/brier_reward": 0.7974715679883957, "eval_rewards/confidence_uniqueness_reward": 0.8931373059749603, "eval_rewards/format_reward": 0.99609375, "eval_rewards/frontier_entropy_batch_reward": -0.99609375, "eval_runtime": 49.7568, "eval_samples_per_second": 10.049, "eval_signal/accuracy_reward/centered_abs_mean": 0.4735107421875, "eval_signal/accuracy_reward/group_std_mean": 0.4941246137022972, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0077248513698578, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23675537109375, "eval_signal/advantage_abs_mean": 0.9267684817314148, "eval_signal/advantage_pre_scale_abs_mean": 0.21922592446208, "eval_signal/advantage_pre_scale_std": 0.23409972339868546, "eval_signal/advantage_std": 0.9876974821090698, "eval_signal/batch_coverage_0/centered_abs_mean": 0.2995058670639992, "eval_signal/batch_coverage_0/group_std_mean": 0.3732306435704231, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018284518970176578, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004282933892682195, "eval_signal/batch_coverage_1/centered_abs_mean": 0.2995058670639992, "eval_signal/batch_coverage_1/group_std_mean": 0.3732306435704231, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018284518970176578, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004282933892682195, "eval_signal/batch_coverage_10/centered_abs_mean": 0.2808452136814594, "eval_signal/batch_coverage_10/group_std_mean": 0.34837885946035385, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017164529534056783, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004016086459159851, "eval_signal/batch_coverage_15/centered_abs_mean": 0.2556031718850136, "eval_signal/batch_coverage_15/group_std_mean": 0.31662533432245255, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015653746901080012, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.0036551255034282804, "eval_signal/batch_coverage_20/centered_abs_mean": 0.21076411753892899, "eval_signal/batch_coverage_20/group_std_mean": 0.26321645826101303, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012889966601505876, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.00301392690744251, "eval_signal/batch_coverage_25/centered_abs_mean": 0.19837579876184464, "eval_signal/batch_coverage_25/group_std_mean": 0.2488715946674347, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012128992471843958, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.002836774045135826, "eval_signal/batch_coverage_5/centered_abs_mean": 0.2995058670639992, "eval_signal/batch_coverage_5/group_std_mean": 0.3732306435704231, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018284518970176578, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004282933892682195, "eval_signal/brier_reward/centered_abs_mean": 0.20047112554311752, "eval_signal/brier_reward/group_std_mean": 0.2557060122489929, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08551956340670586, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020047113299369812, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04430906008929014, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.061461527831852436, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01891739433631301, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004430905915796757, "eval_signal/format_reward/centered_abs_mean": 0.007568359375, "eval_signal/format_reward/group_std_mean": 0.022097086533904076, "eval_signal/format_reward/group_zero_std_frac": 0.875, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.016403171233832836, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0032806345261633396, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965, "eval_steps_per_second": 0.08, "step": 150 }, { "calibration/aurc": 0.3286843099258942, "calibration/batch_distribution_entropy": 0.9648263201622242, "calibration/buffer_distribution_entropy": 0.9903971698614772, "calibration/confidence_entropy": 0.4572479206610348, "calibration/coverage@0%": 0.046269167911196556, "calibration/coverage@1%": 0.05019073653864754, "calibration/coverage@10%": 0.18040620276119976, "calibration/coverage@15%": 0.23105071677694142, "calibration/coverage@20%": 0.32822291412780497, "calibration/coverage@25%": 0.3756455780018698, "calibration/coverage@30%": 0.4696543494734263, "calibration/coverage@5%": 0.11375486057618409, "calibration/ece": 0.13474628859908178, "calibration/mean_confidence": 0.5375935124565585, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00205078125, "completions/max_length": 1224.8, "completions/max_terminated_length": 1224.8, "completions/mean_length": 266.94423828125, "completions/mean_terminated_length": 267.50035400390624, "completions/min_length": 26.8, "completions/min_terminated_length": 121.0, "epoch": 0.496, "grad_norm": 0.0065423352643847466, "learning_rate": 1e-06, "loss": -0.0124, "num_tokens": 529816983.0, "reward": 0.9738301515579224, "reward_std": 0.0878347024321556, "rewards/accuracy_reward": 0.56259765625, "rewards/batch_coverage_0": 0.39764232039451597, "rewards/batch_coverage_1": 0.39764232039451597, "rewards/batch_coverage_10": 0.4279240250587463, "rewards/batch_coverage_15": 0.4341652512550354, "rewards/batch_coverage_20": 0.4424472451210022, "rewards/batch_coverage_25": 0.4455852508544922, "rewards/batch_coverage_5": 0.4148296773433685, "rewards/brier_reward": 0.8039668917655944, "rewards/confidence_uniqueness_reward": 0.9486234784126282, "rewards/format_reward": 0.99794921875, "rewards/frontier_entropy_batch_reward": -0.24033711552619935, "signal/accuracy_reward/centered_abs_mean": 0.079705810546875, "signal/accuracy_reward/group_std_mean": 0.10795025080442429, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.73244309425354, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0398529052734375, "signal/advantage_abs_mean": 0.7574346184730529, "signal/advantage_pre_scale_abs_mean": 0.06559118181467057, "signal/advantage_pre_scale_std": 0.11137249171733857, "signal/advantage_std": 0.9826851725578308, "signal/batch_coverage_0/centered_abs_mean": 0.13030258417129517, "signal/batch_coverage_0/group_std_mean": 0.1680112361907959, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.034121598303318026, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018633269704878331, "signal/batch_coverage_1/centered_abs_mean": 0.13030258417129517, "signal/batch_coverage_1/group_std_mean": 0.1680112361907959, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.034121598303318026, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018633269704878331, "signal/batch_coverage_10/centered_abs_mean": 0.13791993260383606, "signal/batch_coverage_10/group_std_mean": 0.17841829657554625, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03606404885649681, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019722550408914686, "signal/batch_coverage_15/centered_abs_mean": 0.13972941935062408, "signal/batch_coverage_15/group_std_mean": 0.18158538937568663, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.036530570685863496, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0019981306744739414, "signal/batch_coverage_20/centered_abs_mean": 0.1417074352502823, "signal/batch_coverage_20/group_std_mean": 0.18445950746536255, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03704546689987183, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002026416314765811, "signal/batch_coverage_25/centered_abs_mean": 0.1412410706281662, "signal/batch_coverage_25/group_std_mean": 0.18436720073223115, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.036921939253807066, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.00201974727679044, "signal/batch_coverage_5/centered_abs_mean": 0.13376755714416505, "signal/batch_coverage_5/group_std_mean": 0.1726322054862976, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03502202108502388, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019128760090097786, "signal/brier_reward/centered_abs_mean": 0.10100103318691253, "signal/brier_reward/group_std_mean": 0.13356745839118958, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18487447798252105, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010100103542208671, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01650053486227989, "signal/confidence_uniqueness_reward/group_std_mean": 0.026250819489359856, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030377379804849624, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016500535421073437, "signal/format_reward/centered_abs_mean": 0.003948974609375, "signal/format_reward/group_std_mean": 0.010928353667259217, "signal/format_reward/group_zero_std_frac": 0.940625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03621824383735657, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0019744873046875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28486855030059816, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35629957914352417, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5223815381526947, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028486854583024978, "step": 155 }, { "calibration/aurc": 0.250762394647414, "calibration/batch_distribution_entropy": 0.9787780953518853, "calibration/buffer_distribution_entropy": 0.9903697964720649, "calibration/confidence_entropy": 0.49523146648508537, "calibration/coverage@0%": 0.05824618602362205, "calibration/coverage@1%": 0.07777743602362205, "calibration/coverage@10%": 0.34116941437007875, "calibration/coverage@15%": 0.4302688238188976, "calibration/coverage@20%": 0.48497170275590556, "calibration/coverage@25%": 0.5420613927165354, "calibration/coverage@30%": 0.6202202263779528, "calibration/coverage@5%": 0.19384227362204726, "calibration/ece": 0.1276741663789216, "calibration/mean_confidence": 0.5013257373811151, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00224609375, "completions/max_length": 1093.2, "completions/max_terminated_length": 1093.2, "completions/mean_length": 256.78408203125, "completions/mean_terminated_length": 257.3715850830078, "completions/min_length": 22.6, "completions/min_terminated_length": 120.8, "epoch": 0.512, "grad_norm": 0.006810260470956564, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 547592116.0, "reward": 0.9719028115272522, "reward_std": 0.08941576033830642, "rewards/accuracy_reward": 0.55048828125, "rewards/batch_coverage_0": 0.3955975353717804, "rewards/batch_coverage_1": 0.3955975353717804, "rewards/batch_coverage_10": 0.43007825016975404, "rewards/batch_coverage_15": 0.43892077207565305, "rewards/batch_coverage_20": 0.4460327446460724, "rewards/batch_coverage_25": 0.4471981167793274, "rewards/batch_coverage_5": 0.41255890727043154, "rewards/brier_reward": 0.8169501304626465, "rewards/confidence_uniqueness_reward": 0.9497070074081421, "rewards/format_reward": 0.99775390625, "rewards/frontier_entropy_batch_reward": -0.21297547519207, "signal/accuracy_reward/centered_abs_mean": 0.089019775390625, "signal/accuracy_reward/group_std_mean": 0.12106681168079376, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.841943883895874, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0445098876953125, "signal/advantage_abs_mean": 0.7453524589538574, "signal/advantage_pre_scale_abs_mean": 0.06581479609012604, "signal/advantage_pre_scale_std": 0.11291303932666778, "signal/advantage_std": 0.9826244473457336, "signal/batch_coverage_0/centered_abs_mean": 0.13738665282726287, "signal/batch_coverage_0/group_std_mean": 0.17612952888011932, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.037176710367202756, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001964629115536809, "signal/batch_coverage_1/centered_abs_mean": 0.13738665282726287, "signal/batch_coverage_1/group_std_mean": 0.17612952888011932, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.037176710367202756, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001964629115536809, "signal/batch_coverage_10/centered_abs_mean": 0.1476728081703186, "signal/batch_coverage_10/group_std_mean": 0.1905215263366699, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03995952680706978, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021117211086675524, "signal/batch_coverage_15/centered_abs_mean": 0.14803148657083512, "signal/batch_coverage_15/group_std_mean": 0.19160535037517548, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04006317034363747, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021168502746149896, "signal/batch_coverage_20/centered_abs_mean": 0.15196847915649414, "signal/batch_coverage_20/group_std_mean": 0.19699666202068328, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04112867340445518, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021731492131948473, "signal/batch_coverage_25/centered_abs_mean": 0.15217567086219788, "signal/batch_coverage_25/group_std_mean": 0.19736577272415162, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04118807390332222, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002176112146116793, "signal/batch_coverage_5/centered_abs_mean": 0.1416477769613266, "signal/batch_coverage_5/group_std_mean": 0.18204045295715332, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03832144886255264, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002025563293136656, "signal/brier_reward/centered_abs_mean": 0.1010772556066513, "signal/brier_reward/group_std_mean": 0.13320232033729554, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19146246314048768, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010107725858688354, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015872811153531074, "signal/confidence_uniqueness_reward/group_std_mean": 0.024924156628549098, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030068162456154825, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015872811432927846, "signal/format_reward/centered_abs_mean": 0.004254150390625, "signal/format_reward/group_std_mean": 0.010569548420608043, "signal/format_reward/group_zero_std_frac": 0.946875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04026953727006912, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021270751953125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26627694964408877, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3385903060436249, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5046402394771576, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026627695932984353, "step": 160 }, { "calibration/aurc": 0.182792093122934, "calibration/batch_distribution_entropy": 0.9824451422595312, "calibration/buffer_distribution_entropy": 0.9911765451685219, "calibration/confidence_entropy": 0.47350543428585035, "calibration/coverage@0%": 0.08735411791189954, "calibration/coverage@1%": 0.10381101869775415, "calibration/coverage@10%": 0.33079101600119426, "calibration/coverage@15%": 0.46098051253900374, "calibration/coverage@20%": 0.5857192411591355, "calibration/coverage@25%": 0.6962948664239763, "calibration/coverage@30%": 0.7833417600832082, "calibration/coverage@5%": 0.2562663569234177, "calibration/ece": 0.11077890418773365, "calibration/mean_confidence": 0.5144188109520605, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 264.58388671875, "completions/mean_terminated_length": 265.4162841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.528, "grad_norm": 0.008622488006949425, "learning_rate": 1e-06, "loss": -0.0128, "num_tokens": 565330991.0, "reward": 0.9707194685935974, "reward_std": 0.09202601760625839, "rewards/accuracy_reward": 0.54912109375, "rewards/batch_coverage_0": 0.42622668743133546, "rewards/batch_coverage_1": 0.42622668743133546, "rewards/batch_coverage_10": 0.45363242626190187, "rewards/batch_coverage_15": 0.46004220843315125, "rewards/batch_coverage_20": 0.46755346059799197, "rewards/batch_coverage_25": 0.4684960961341858, "rewards/batch_coverage_5": 0.4366525709629059, "rewards/brier_reward": 0.8200646162033081, "rewards/confidence_uniqueness_reward": 0.9470735311508178, "rewards/format_reward": 0.996875, "rewards/frontier_entropy_batch_reward": -0.23877668678760527, "signal/accuracy_reward/centered_abs_mean": 0.096038818359375, "signal/accuracy_reward/group_std_mean": 0.12631949186325073, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9202286958694458, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0480194091796875, "signal/advantage_abs_mean": 0.7485239148139954, "signal/advantage_pre_scale_abs_mean": 0.06765339076519013, "signal/advantage_pre_scale_std": 0.1165284737944603, "signal/advantage_std": 0.9825997471809387, "signal/batch_coverage_0/centered_abs_mean": 0.13870301246643066, "signal/batch_coverage_0/group_std_mean": 0.17862937450408936, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03813367709517479, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019834531703963876, "signal/batch_coverage_1/centered_abs_mean": 0.13870301246643066, "signal/batch_coverage_1/group_std_mean": 0.17862937450408936, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03813367709517479, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019834531703963876, "signal/batch_coverage_10/centered_abs_mean": 0.14713135063648225, "signal/batch_coverage_10/group_std_mean": 0.19096679985523224, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04046852439641953, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021039783488959072, "signal/batch_coverage_15/centered_abs_mean": 0.14752067029476165, "signal/batch_coverage_15/group_std_mean": 0.19163157641887665, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04058904945850372, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00210954574868083, "signal/batch_coverage_20/centered_abs_mean": 0.15036363005638123, "signal/batch_coverage_20/group_std_mean": 0.19588211476802825, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04134969413280487, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002150199841707945, "signal/batch_coverage_25/centered_abs_mean": 0.15014814138412474, "signal/batch_coverage_25/group_std_mean": 0.195842045545578, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04129090085625649, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021471184212714435, "signal/batch_coverage_5/centered_abs_mean": 0.1420408606529236, "signal/batch_coverage_5/group_std_mean": 0.18300524055957795, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03905327394604683, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020311842672526836, "signal/brier_reward/centered_abs_mean": 0.09753091931343079, "signal/brier_reward/group_std_mean": 0.13158592879772185, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18743386566638948, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009753092005848885, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017668948322534562, "signal/confidence_uniqueness_reward/group_std_mean": 0.029466599225997925, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03409051336348057, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017668949207291008, "signal/format_reward/centered_abs_mean": 0.00595703125, "signal/format_reward/group_std_mean": 0.015264297416433693, "signal/format_reward/group_zero_std_frac": 0.921875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.057597226649522784, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002978515625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27023516297340394, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3435254514217377, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5200891494750977, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027023518458008766, "step": 165 }, { "calibration/aurc": 0.20382781310413386, "calibration/batch_distribution_entropy": 0.9231029218947485, "calibration/buffer_distribution_entropy": 0.9907112369323047, "calibration/confidence_entropy": 0.4080044647499128, "calibration/coverage@0%": 0.022343892028416367, "calibration/coverage@1%": 0.022343892028416367, "calibration/coverage@10%": 0.20518062547057977, "calibration/coverage@15%": 0.3918224197914523, "calibration/coverage@20%": 0.5747048732359679, "calibration/coverage@25%": 0.7370451964020966, "calibration/coverage@30%": 0.804878027001573, "calibration/coverage@5%": 0.10079797326181579, "calibration/ece": 0.08342971454497072, "calibration/mean_confidence": 0.5725822810234342, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00478515625, "completions/max_length": 1112.0, "completions/max_terminated_length": 1112.0, "completions/mean_length": 272.848828125, "completions/mean_terminated_length": 274.163623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 122.2, "epoch": 0.544, "grad_norm": 0.006315728649497032, "learning_rate": 1e-06, "loss": -0.0202, "num_tokens": 583288547.0, "reward": 0.9795204758644104, "reward_std": 0.10076274275779724, "rewards/accuracy_reward": 0.5869140625, "rewards/batch_coverage_0": 0.38468037247657777, "rewards/batch_coverage_1": 0.38468037247657777, "rewards/batch_coverage_10": 0.4374719798564911, "rewards/batch_coverage_15": 0.4450931191444397, "rewards/batch_coverage_20": 0.4514342784881592, "rewards/batch_coverage_25": 0.45464577078819274, "rewards/batch_coverage_5": 0.41293213367462156, "rewards/brier_reward": 0.8001878976821899, "rewards/confidence_uniqueness_reward": 0.9425620436668396, "rewards/format_reward": 0.99521484375, "rewards/frontier_entropy_batch_reward": -0.2830341339111328, "signal/accuracy_reward/centered_abs_mean": 0.0957275390625, "signal/accuracy_reward/group_std_mean": 0.13344258964061737, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.831443476676941, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04786376953125, "signal/advantage_abs_mean": 0.734432065486908, "signal/advantage_pre_scale_abs_mean": 0.07164475619792939, "signal/advantage_pre_scale_std": 0.12459437847137451, "signal/advantage_std": 0.9827680230140686, "signal/batch_coverage_0/centered_abs_mean": 0.13583238422870636, "signal/batch_coverage_0/group_std_mean": 0.1736106514930725, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03401588536798954, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019424030557274818, "signal/batch_coverage_1/centered_abs_mean": 0.13583238422870636, "signal/batch_coverage_1/group_std_mean": 0.1736106514930725, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03401588536798954, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019424030557274818, "signal/batch_coverage_10/centered_abs_mean": 0.1505550354719162, "signal/batch_coverage_10/group_std_mean": 0.1952440172433853, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037708821892738345, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002152937022037804, "signal/batch_coverage_15/centered_abs_mean": 0.15017966628074647, "signal/batch_coverage_15/group_std_mean": 0.1952642858028412, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03756738603115082, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002147569367662072, "signal/batch_coverage_20/centered_abs_mean": 0.15021539926528932, "signal/batch_coverage_20/group_std_mean": 0.19603228569030762, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03747752532362938, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021480801980942486, "signal/batch_coverage_25/centered_abs_mean": 0.15165410339832305, "signal/batch_coverage_25/group_std_mean": 0.19860296845436096, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03792189955711365, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021686537191271784, "signal/batch_coverage_5/centered_abs_mean": 0.14168917536735534, "signal/batch_coverage_5/group_std_mean": 0.1822911262512207, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03544792048633098, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020261551951989533, "signal/brier_reward/centered_abs_mean": 0.11252984702587128, "signal/brier_reward/group_std_mean": 0.14877772629261016, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19660184383392335, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011252984963357448, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02271880432963371, "signal/confidence_uniqueness_reward/group_std_mean": 0.037897758185863495, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04011071212589741, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002271880488842726, "signal/format_reward/centered_abs_mean": 0.008966064453125, "signal/format_reward/group_std_mean": 0.021273162961006165, "signal/format_reward/group_zero_std_frac": 0.896875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07927814871072769, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0044830322265625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29769091606140136, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37242411971092226, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5192531406879425, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029769092053174972, "step": 170 }, { "calibration/aurc": 0.2208552602109703, "calibration/batch_distribution_entropy": 0.9657113588875912, "calibration/buffer_distribution_entropy": 0.9895090286189452, "calibration/confidence_entropy": 0.47121970246450573, "calibration/coverage@0%": 0.050205249012967815, "calibration/coverage@1%": 0.07447139382705784, "calibration/coverage@10%": 0.37735943223929025, "calibration/coverage@15%": 0.4405383529760919, "calibration/coverage@20%": 0.5207640236801371, "calibration/coverage@25%": 0.5850732988217104, "calibration/coverage@30%": 0.6418992854569161, "calibration/coverage@5%": 0.30050195244071853, "calibration/ece": 0.10489993015645589, "calibration/mean_confidence": 0.49981221301042106, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0044921875, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 276.05908203125, "completions/mean_terminated_length": 277.3196655273438, "completions/min_length": 0.0, "completions/min_terminated_length": 113.8, "epoch": 0.56, "grad_norm": 0.005405076779425144, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 600936800.0, "reward": 0.9639453887939453, "reward_std": 0.09017050564289093, "rewards/accuracy_reward": 0.5365234375, "rewards/batch_coverage_0": 0.41431847810745237, "rewards/batch_coverage_1": 0.41431847810745237, "rewards/batch_coverage_10": 0.45128042697906495, "rewards/batch_coverage_15": 0.45638718008995055, "rewards/batch_coverage_20": 0.4603978514671326, "rewards/batch_coverage_25": 0.46246368288993833, "rewards/batch_coverage_5": 0.44165197014808655, "rewards/brier_reward": 0.8196909427642822, "rewards/confidence_uniqueness_reward": 0.9461859464645386, "rewards/format_reward": 0.9955078125, "rewards/frontier_entropy_batch_reward": -0.22999619245529174, "signal/accuracy_reward/centered_abs_mean": 0.07750244140625, "signal/accuracy_reward/group_std_mean": 0.10900391191244126, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.696437931060791, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.038751220703125, "signal/advantage_abs_mean": 0.7308009505271912, "signal/advantage_pre_scale_abs_mean": 0.06338043585419655, "signal/advantage_pre_scale_std": 0.1112355962395668, "signal/advantage_std": 0.9826737999916076, "signal/batch_coverage_0/centered_abs_mean": 0.13989091217517852, "signal/batch_coverage_0/group_std_mean": 0.1777060866355896, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03682319894433021, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002000440075062215, "signal/batch_coverage_1/centered_abs_mean": 0.13989091217517852, "signal/batch_coverage_1/group_std_mean": 0.1777060866355896, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03682319894433021, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002000440075062215, "signal/batch_coverage_10/centered_abs_mean": 0.15016445517539978, "signal/batch_coverage_10/group_std_mean": 0.19216148853302, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03952023386955261, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002147351624444127, "signal/batch_coverage_15/centered_abs_mean": 0.15080960988998413, "signal/batch_coverage_15/group_std_mean": 0.19309694170951844, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.039715195447206496, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002156577631831169, "signal/batch_coverage_20/centered_abs_mean": 0.1491364985704422, "signal/batch_coverage_20/group_std_mean": 0.19163033962249756, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03939807564020157, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021326518384739757, "signal/batch_coverage_25/centered_abs_mean": 0.1505493015050888, "signal/batch_coverage_25/group_std_mean": 0.19344808757305146, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.039786546677351, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021528550423681736, "signal/batch_coverage_5/centered_abs_mean": 0.1475885719060898, "signal/batch_coverage_5/group_std_mean": 0.18809866905212402, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03886888325214386, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021105165826156734, "signal/brier_reward/centered_abs_mean": 0.10162670761346818, "signal/brier_reward/group_std_mean": 0.13524822592735292, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18657733201980592, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010162671282887458, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019791874662041666, "signal/confidence_uniqueness_reward/group_std_mean": 0.03511152528226376, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03647754043340683, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019791874103248118, "signal/format_reward/centered_abs_mean": 0.008544921875, "signal/format_reward/group_std_mean": 0.021593831665813922, "signal/format_reward/group_zero_std_frac": 0.890625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07788022682070732, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0042724609375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27670013904571533, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3490628838539124, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5116437554359436, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027670014277100564, "step": 175 }, { "calibration/aurc": 0.24882780044253244, "calibration/batch_distribution_entropy": 0.9585615809891646, "calibration/buffer_distribution_entropy": 0.9896047917697167, "calibration/confidence_entropy": 0.4563466045343387, "calibration/coverage@0%": 0.06457074681948144, "calibration/coverage@1%": 0.06614865608969842, "calibration/coverage@10%": 0.2500396689641059, "calibration/coverage@15%": 0.3361376770331115, "calibration/coverage@20%": 0.45370766312894306, "calibration/coverage@25%": 0.5531476208199892, "calibration/coverage@30%": 0.6381766550021126, "calibration/coverage@5%": 0.18364292817642075, "calibration/ece": 0.09905783741164291, "calibration/mean_confidence": 0.5324343582562876, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008984375, "completions/max_length": 1335.6, "completions/max_terminated_length": 1335.6, "completions/mean_length": 276.25419921875, "completions/mean_terminated_length": 278.7879333496094, "completions/min_length": 0.0, "completions/min_terminated_length": 119.4, "epoch": 0.576, "grad_norm": 0.005157508887350559, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 618952267.0, "reward": 0.9553242683410644, "reward_std": 0.09966547191143035, "rewards/accuracy_reward": 0.53291015625, "rewards/batch_coverage_0": 0.41381676197052003, "rewards/batch_coverage_1": 0.41381676197052003, "rewards/batch_coverage_10": 0.44667200446128846, "rewards/batch_coverage_15": 0.4532722055912018, "rewards/batch_coverage_20": 0.45751644372940065, "rewards/batch_coverage_25": 0.4595360100269318, "rewards/batch_coverage_5": 0.4303095579147339, "rewards/brier_reward": 0.8042745471000672, "rewards/confidence_uniqueness_reward": 0.9406708955764771, "rewards/format_reward": 0.991015625, "rewards/frontier_entropy_batch_reward": -0.25104811489582063, "signal/accuracy_reward/centered_abs_mean": 0.077618408203125, "signal/accuracy_reward/group_std_mean": 0.10832233279943466, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.676012110710144, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0388092041015625, "signal/advantage_abs_mean": 0.7248594284057617, "signal/advantage_pre_scale_abs_mean": 0.06860132440924645, "signal/advantage_pre_scale_std": 0.12584017515182494, "signal/advantage_std": 0.9827669262886047, "signal/batch_coverage_0/centered_abs_mean": 0.12729153782129288, "signal/batch_coverage_0/group_std_mean": 0.1640220195055008, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03196005895733833, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018202689243480563, "signal/batch_coverage_1/centered_abs_mean": 0.12729153782129288, "signal/batch_coverage_1/group_std_mean": 0.1640220195055008, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03196005895733833, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018202689243480563, "signal/batch_coverage_10/centered_abs_mean": 0.13694520890712739, "signal/batch_coverage_10/group_std_mean": 0.17799520790576934, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03435723595321179, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.001958316517993808, "signal/batch_coverage_15/centered_abs_mean": 0.1368080973625183, "signal/batch_coverage_15/group_std_mean": 0.17819553017616271, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0343306839466095, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0019563557812944054, "signal/batch_coverage_20/centered_abs_mean": 0.13851545453071595, "signal/batch_coverage_20/group_std_mean": 0.1808491289615631, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.034758536517620085, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0019807710079476236, "signal/batch_coverage_25/centered_abs_mean": 0.1401458889245987, "signal/batch_coverage_25/group_std_mean": 0.18298054337501526, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.035132177919149396, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002004086133092642, "signal/batch_coverage_5/centered_abs_mean": 0.13248585164546967, "signal/batch_coverage_5/group_std_mean": 0.17141394913196564, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.033234558254480365, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0018945475341752172, "signal/brier_reward/centered_abs_mean": 0.10222317129373551, "signal/brier_reward/group_std_mean": 0.13865281045436859, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17827674746513367, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010222317650914192, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027541452273726463, "signal/confidence_uniqueness_reward/group_std_mean": 0.049568860232830046, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0479873813688755, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002754145348444581, "signal/format_reward/centered_abs_mean": 0.0166748046875, "signal/format_reward/group_std_mean": 0.036608771234750745, "signal/format_reward/group_zero_std_frac": 0.834375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.14411495625972748, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00833740234375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2816839575767517, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3570773422718048, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49249241352081297, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028168396279215812, "step": 180 }, { "calibration/aurc": 0.26415483037912674, "calibration/batch_distribution_entropy": 0.9665828472427667, "calibration/buffer_distribution_entropy": 0.988880973119419, "calibration/confidence_entropy": 0.451765611193816, "calibration/coverage@0%": 0.0738741045005866, "calibration/coverage@1%": 0.14210939861823366, "calibration/coverage@10%": 0.2970794215182419, "calibration/coverage@15%": 0.3482093894656264, "calibration/coverage@20%": 0.5227152602100702, "calibration/coverage@25%": 0.5803462753799378, "calibration/coverage@30%": 0.6305694300226883, "calibration/coverage@5%": 0.21826030199905885, "calibration/ece": 0.15480829878368255, "calibration/mean_confidence": 0.48369991354676606, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00869140625, "completions/max_length": 1418.6, "completions/max_terminated_length": 1418.6, "completions/mean_length": 270.27890625, "completions/mean_terminated_length": 272.670263671875, "completions/min_length": 0.0, "completions/min_terminated_length": 118.4, "epoch": 0.592, "grad_norm": 0.005508663132786751, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 636887635.0, "reward": 0.9581284046173095, "reward_std": 0.09956228137016296, "rewards/accuracy_reward": 0.53955078125, "rewards/batch_coverage_0": 0.4133278489112854, "rewards/batch_coverage_1": 0.4133278489112854, "rewards/batch_coverage_10": 0.44717544317245483, "rewards/batch_coverage_15": 0.4516117811203003, "rewards/batch_coverage_20": 0.4561953365802765, "rewards/batch_coverage_25": 0.4570241093635559, "rewards/batch_coverage_5": 0.43275184035301206, "rewards/brier_reward": 0.8061846494674683, "rewards/confidence_uniqueness_reward": 0.9413105726242066, "rewards/format_reward": 0.99130859375, "rewards/frontier_entropy_batch_reward": -0.25972045958042145, "signal/accuracy_reward/centered_abs_mean": 0.088555908203125, "signal/accuracy_reward/group_std_mean": 0.11966662853956223, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7483019828796387, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0442779541015625, "signal/advantage_abs_mean": 0.730834710597992, "signal/advantage_pre_scale_abs_mean": 0.07057239040732384, "signal/advantage_pre_scale_std": 0.12372962981462479, "signal/advantage_std": 0.9827643036842346, "signal/batch_coverage_0/centered_abs_mean": 0.1381935030221939, "signal/batch_coverage_0/group_std_mean": 0.17919765114784242, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.034601961821317674, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001976167061366141, "signal/batch_coverage_1/centered_abs_mean": 0.1381935030221939, "signal/batch_coverage_1/group_std_mean": 0.17919765114784242, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.034601961821317674, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001976167061366141, "signal/batch_coverage_10/centered_abs_mean": 0.14485456943511962, "signal/batch_coverage_10/group_std_mean": 0.19011588096618653, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03615746423602104, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020714202895760537, "signal/batch_coverage_15/centered_abs_mean": 0.14329394698143005, "signal/batch_coverage_15/group_std_mean": 0.18823909163475036, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.035794655233621596, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002049103332683444, "signal/batch_coverage_20/centered_abs_mean": 0.14287793934345244, "signal/batch_coverage_20/group_std_mean": 0.1883653312921524, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.035708678513765336, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020431545563042164, "signal/batch_coverage_25/centered_abs_mean": 0.1435356080532074, "signal/batch_coverage_25/group_std_mean": 0.18936876356601715, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03586432859301567, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020525591680780053, "signal/batch_coverage_5/centered_abs_mean": 0.14292764365673066, "signal/batch_coverage_5/group_std_mean": 0.18623073101043702, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03571743853390217, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020438652485609055, "signal/brier_reward/centered_abs_mean": 0.10219440907239914, "signal/brier_reward/group_std_mean": 0.13913445472717284, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17841624617576599, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010219440795481205, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026426216587424278, "signal/confidence_uniqueness_reward/group_std_mean": 0.046144616603851316, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04587032720446586, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026426217518746853, "signal/format_reward/centered_abs_mean": 0.015716552734375, "signal/format_reward/group_std_mean": 0.03357396759092808, "signal/format_reward/group_zero_std_frac": 0.846875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1341138780117035, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0078582763671875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2888515055179596, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3633566081523895, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.506644070148468, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02888515144586563, "step": 185 }, { "calibration/aurc": 0.19599936396909629, "calibration/batch_distribution_entropy": 0.9471727617725497, "calibration/buffer_distribution_entropy": 0.9885040321363311, "calibration/confidence_entropy": 0.4401101556912829, "calibration/coverage@0%": 0.12422711072406271, "calibration/coverage@1%": 0.13250435227854582, "calibration/coverage@10%": 0.387829161677371, "calibration/coverage@15%": 0.49070235172216375, "calibration/coverage@20%": 0.5777213832141951, "calibration/coverage@25%": 0.642066400167721, "calibration/coverage@30%": 0.7210643750176389, "calibration/coverage@5%": 0.2507637549338591, "calibration/ece": 0.08982427851874988, "calibration/mean_confidence": 0.47753607038301726, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0154296875, "completions/max_length": 1333.6, "completions/max_terminated_length": 1333.6, "completions/mean_length": 287.44697265625, "completions/mean_terminated_length": 291.9261962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 125.6, "epoch": 0.608, "grad_norm": 0.004835889209061861, "learning_rate": 1e-06, "loss": -0.059, "num_tokens": 654830580.0, "reward": 0.9553421139717102, "reward_std": 0.1128614827990532, "rewards/accuracy_reward": 0.54013671875, "rewards/batch_coverage_0": 0.43205046057701113, "rewards/batch_coverage_1": 0.43205046057701113, "rewards/batch_coverage_10": 0.45896238684654234, "rewards/batch_coverage_15": 0.46331515312194826, "rewards/batch_coverage_20": 0.46950970888137816, "rewards/batch_coverage_25": 0.4711323916912079, "rewards/batch_coverage_5": 0.44750791788101196, "rewards/brier_reward": 0.8163456797599793, "rewards/confidence_uniqueness_reward": 0.9338125109672546, "rewards/format_reward": 0.9845703125, "rewards/frontier_entropy_batch_reward": -0.27422977685928346, "signal/accuracy_reward/centered_abs_mean": 0.090240478515625, "signal/accuracy_reward/group_std_mean": 0.1210777685046196, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7534727334976197, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451202392578125, "signal/advantage_abs_mean": 0.7139726996421814, "signal/advantage_pre_scale_abs_mean": 0.07716420143842698, "signal/advantage_pre_scale_std": 0.14323937892913818, "signal/advantage_std": 0.9828421831130981, "signal/batch_coverage_0/centered_abs_mean": 0.13462951481342317, "signal/batch_coverage_0/group_std_mean": 0.17377310991287231, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03228494115173817, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019252020167186857, "signal/batch_coverage_1/centered_abs_mean": 0.13462951481342317, "signal/batch_coverage_1/group_std_mean": 0.17377310991287231, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03228494115173817, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019252020167186857, "signal/batch_coverage_10/centered_abs_mean": 0.1441013604402542, "signal/batch_coverage_10/group_std_mean": 0.18676916658878326, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03450990542769432, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020606494974344967, "signal/batch_coverage_15/centered_abs_mean": 0.1440788596868515, "signal/batch_coverage_15/group_std_mean": 0.18676540851593018, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03451910987496376, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020603276556357742, "signal/batch_coverage_20/centered_abs_mean": 0.14172255396842956, "signal/batch_coverage_20/group_std_mean": 0.18521577417850493, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03395959660410881, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020266325445845723, "signal/batch_coverage_25/centered_abs_mean": 0.1393471211194992, "signal/batch_coverage_25/group_std_mean": 0.182856085896492, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03335911333560944, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0019926637876778843, "signal/batch_coverage_5/centered_abs_mean": 0.1396337330341339, "signal/batch_coverage_5/group_std_mean": 0.18044842779636383, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03343283012509346, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019967623986303805, "signal/brier_reward/centered_abs_mean": 0.10754630565643311, "signal/brier_reward/group_std_mean": 0.14926459193229674, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1797608643770218, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010754630714654923, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036745325475931165, "signal/confidence_uniqueness_reward/group_std_mean": 0.06699450463056564, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.061678997427225116, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036745326593518256, "signal/format_reward/centered_abs_mean": 0.02781982421875, "signal/format_reward/group_std_mean": 0.05717283710837364, "signal/format_reward/group_zero_std_frac": 0.75, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2333482623100281, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013909912109375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29573175609111785, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36904223561286925, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4965230643749237, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029573175683617593, "step": 190 }, { "calibration/aurc": 0.19647939724342872, "calibration/batch_distribution_entropy": 0.9693845430347352, "calibration/buffer_distribution_entropy": 0.9883685744303129, "calibration/confidence_entropy": 0.45914596746307695, "calibration/coverage@0%": 0.07503450741176323, "calibration/coverage@1%": 0.08932022169747753, "calibration/coverage@10%": 0.33458487664513836, "calibration/coverage@15%": 0.45889528752778574, "calibration/coverage@20%": 0.5579918217023472, "calibration/coverage@25%": 0.6432280912845998, "calibration/coverage@30%": 0.7236939216479017, "calibration/coverage@5%": 0.24648746194588217, "calibration/ece": 0.12530991598547006, "calibration/mean_confidence": 0.5254408385195928, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01689453125, "completions/max_length": 1328.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 301.00869140625, "completions/mean_terminated_length": 306.2028564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 126.8, "epoch": 0.624, "grad_norm": 0.0048108589835464954, "learning_rate": 1e-06, "loss": -0.0699, "num_tokens": 673256813.0, "reward": 0.9550261974334717, "reward_std": 0.11695955246686936, "rewards/accuracy_reward": 0.53720703125, "rewards/batch_coverage_0": 0.4179235279560089, "rewards/batch_coverage_1": 0.4179235279560089, "rewards/batch_coverage_10": 0.45245782732963563, "rewards/batch_coverage_15": 0.4575910210609436, "rewards/batch_coverage_20": 0.46224952340126035, "rewards/batch_coverage_25": 0.46388591527938844, "rewards/batch_coverage_5": 0.4407612144947052, "rewards/brier_reward": 0.8106015086174011, "rewards/confidence_uniqueness_reward": 0.9344212055206299, "rewards/format_reward": 0.98310546875, "rewards/frontier_entropy_batch_reward": -0.2414526730775833, "signal/accuracy_reward/centered_abs_mean": 0.092462158203125, "signal/accuracy_reward/group_std_mean": 0.1260865330696106, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7334414839744567, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0462310791015625, "signal/advantage_abs_mean": 0.7067392587661743, "signal/advantage_pre_scale_abs_mean": 0.0787854865193367, "signal/advantage_pre_scale_std": 0.14262742698192596, "signal/advantage_std": 0.9829223394393921, "signal/batch_coverage_0/centered_abs_mean": 0.15231256783008576, "signal/batch_coverage_0/group_std_mean": 0.19604605734348296, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03467583134770393, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002178069809451699, "signal/batch_coverage_1/centered_abs_mean": 0.15231256783008576, "signal/batch_coverage_1/group_std_mean": 0.19604605734348296, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03467583134770393, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002178069809451699, "signal/batch_coverage_10/centered_abs_mean": 0.16275534629821778, "signal/batch_coverage_10/group_std_mean": 0.20997639298439025, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0370567686855793, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0023274014238268137, "signal/batch_coverage_15/centered_abs_mean": 0.16307214200496672, "signal/batch_coverage_15/group_std_mean": 0.2107671707868576, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03714829385280609, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0023319316562265156, "signal/batch_coverage_20/centered_abs_mean": 0.1629529505968094, "signal/batch_coverage_20/group_std_mean": 0.21109949946403503, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03712652400135994, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0023302271030843256, "signal/batch_coverage_25/centered_abs_mean": 0.16004838049411774, "signal/batch_coverage_25/group_std_mean": 0.20808916687965393, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03642952218651772, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002288691932335496, "signal/batch_coverage_5/centered_abs_mean": 0.15944549441337585, "signal/batch_coverage_5/group_std_mean": 0.20555467605590821, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.036301738768816, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022800705395638944, "signal/brier_reward/centered_abs_mean": 0.11719284057617188, "signal/brier_reward/group_std_mean": 0.15894266068935395, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18694722950458526, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011719284206628799, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03853886350989342, "signal/confidence_uniqueness_reward/group_std_mean": 0.07142309993505477, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.061740058660507205, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003853886341676116, "signal/format_reward/centered_abs_mean": 0.030364990234375, "signal/format_reward/group_std_mean": 0.06245248168706894, "signal/format_reward/group_zero_std_frac": 0.728125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2433932214975357, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0151824951171875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2803094804286957, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35295377373695375, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4465214192867279, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803094796836376, "step": 195 }, { "calibration/aurc": 0.22468700970935918, "calibration/batch_distribution_entropy": 0.9575049131293181, "calibration/buffer_distribution_entropy": 0.9880757439242596, "calibration/confidence_entropy": 0.4600728120540899, "calibration/coverage@0%": 0.09614797444733672, "calibration/coverage@1%": 0.1255714336918695, "calibration/coverage@10%": 0.3537883171019663, "calibration/coverage@15%": 0.43703556909152963, "calibration/coverage@20%": 0.5238639030046074, "calibration/coverage@25%": 0.6189128796513287, "calibration/coverage@30%": 0.6953550272097249, "calibration/coverage@5%": 0.23783649905851054, "calibration/ece": 0.13375213264156954, "calibration/mean_confidence": 0.5644211647402342, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00927734375, "completions/max_length": 1226.4, "completions/max_terminated_length": 1226.4, "completions/mean_length": 291.28251953125, "completions/mean_terminated_length": 294.0164733886719, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.64, "grad_norm": 0.005845364648848772, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 691582234.0, "reward": 0.9772037029266357, "reward_std": 0.0971896544098854, "rewards/accuracy_reward": 0.5740234375, "rewards/batch_coverage_0": 0.44770985245704653, "rewards/batch_coverage_1": 0.44770985245704653, "rewards/batch_coverage_10": 0.47898629307746887, "rewards/batch_coverage_15": 0.48492227792739867, "rewards/batch_coverage_20": 0.48741101026535033, "rewards/batch_coverage_25": 0.48993545174598696, "rewards/batch_coverage_5": 0.46683679819107055, "rewards/brier_reward": 0.8279487013816833, "rewards/confidence_uniqueness_reward": 0.9387916803359986, "rewards/format_reward": 0.99072265625, "rewards/frontier_entropy_batch_reward": -0.29083598852157594, "signal/accuracy_reward/centered_abs_mean": 0.0724365234375, "signal/accuracy_reward/group_std_mean": 0.10086787045001984, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6660116136074066, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03621826171875, "signal/advantage_abs_mean": 0.7312237024307251, "signal/advantage_pre_scale_abs_mean": 0.0678968921303749, "signal/advantage_pre_scale_std": 0.1250871941447258, "signal/advantage_std": 0.9826941609382629, "signal/batch_coverage_0/centered_abs_mean": 0.1281747579574585, "signal/batch_coverage_0/group_std_mean": 0.16710792481899261, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03346509672701359, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018328989623114466, "signal/batch_coverage_1/centered_abs_mean": 0.1281747579574585, "signal/batch_coverage_1/group_std_mean": 0.16710792481899261, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03346509672701359, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018328989623114466, "signal/batch_coverage_10/centered_abs_mean": 0.13769169449806212, "signal/batch_coverage_10/group_std_mean": 0.18058500289916993, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.035960767045617105, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019689911743625997, "signal/batch_coverage_15/centered_abs_mean": 0.13795506060123444, "signal/batch_coverage_15/group_std_mean": 0.1810959905385971, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03599607348442078, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.001972757396288216, "signal/batch_coverage_20/centered_abs_mean": 0.13890421986579896, "signal/batch_coverage_20/group_std_mean": 0.1826791375875473, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03625572361052036, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0019863303285092117, "signal/batch_coverage_25/centered_abs_mean": 0.13745348155498505, "signal/batch_coverage_25/group_std_mean": 0.18095198273658752, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.035954632610082624, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0019655847921967507, "signal/batch_coverage_5/centered_abs_mean": 0.1342063993215561, "signal/batch_coverage_5/group_std_mean": 0.17566960155963898, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03503052368760109, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.001919151423498988, "signal/brier_reward/centered_abs_mean": 0.09804150611162185, "signal/brier_reward/group_std_mean": 0.1333908811211586, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.177875754237175, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009804150648415088, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028178646042943, "signal/confidence_uniqueness_reward/group_std_mean": 0.04986298829317093, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05152831450104713, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028178644832223656, "signal/format_reward/centered_abs_mean": 0.016888427734375, "signal/format_reward/group_std_mean": 0.036415594071149825, "signal/format_reward/group_zero_std_frac": 0.834375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.15302720963954924, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0084442138671875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29316571950912473, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3644020974636078, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5366144418716431, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029316572844982146, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.44305710238719687, "eval_calibration/batch_distribution_entropy": 0.8833562952678735, "eval_calibration/buffer_distribution_entropy": 0.9873646700855458, "eval_calibration/confidence_entropy": 0.46500058783929477, "eval_calibration/coverage@0%": 0.1015625, "eval_calibration/coverage@1%": 0.1015625, "eval_calibration/coverage@10%": 0.1015625, "eval_calibration/coverage@15%": 0.125, "eval_calibration/coverage@20%": 0.1484375, "eval_calibration/coverage@25%": 0.2578125, "eval_calibration/coverage@30%": 0.328125, "eval_calibration/coverage@5%": 0.1015625, "eval_calibration/ece": 0.175218991086652, "eval_calibration/mean_confidence": 0.4452587010502278, "eval_completions/clipped_ratio": 0.004108297413793094, "eval_completions/max_length": 791.0, "eval_completions/max_terminated_length": 791.0, "eval_completions/mean_length": 302.0158920288086, "eval_completions/mean_terminated_length": 303.2757110595703, "eval_completions/min_length": 66.25, "eval_completions/min_terminated_length": 136.75, "eval_loss": 0.0, "eval_num_tokens": 691582234.0, "eval_reward": 0.7945444732904434, "eval_reward_std": 0.23430271446704865, "eval_rewards/accuracy_reward": 0.416015625, "eval_rewards/batch_coverage_0": 0.19991468638181686, "eval_rewards/batch_coverage_1": 0.19991468638181686, "eval_rewards/batch_coverage_10": 0.18121831491589546, "eval_rewards/batch_coverage_15": 0.16571493819355965, "eval_rewards/batch_coverage_20": 0.1504622232168913, "eval_rewards/batch_coverage_25": 0.12405495345592499, "eval_rewards/batch_coverage_5": 0.19991468638181686, "eval_rewards/brier_reward": 0.8158539831638336, "eval_rewards/confidence_uniqueness_reward": 0.8905068784952164, "eval_rewards/format_reward": 0.99609375, "eval_rewards/frontier_entropy_batch_reward": -0.99609375, "eval_runtime": 45.8336, "eval_samples_per_second": 10.909, "eval_signal/accuracy_reward/centered_abs_mean": 0.4730224609375, "eval_signal/accuracy_reward/group_std_mean": 0.49389340728521347, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0130815505981445, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23651123046875, "eval_signal/advantage_abs_mean": 0.9282233417034149, "eval_signal/advantage_pre_scale_abs_mean": 0.21765509992837906, "eval_signal/advantage_pre_scale_std": 0.23197273164987564, "eval_signal/advantage_std": 0.987695038318634, "eval_signal/batch_coverage_0/centered_abs_mean": 0.3202243745326996, "eval_signal/batch_coverage_0/group_std_mean": 0.3795798420906067, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019652313785627484, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.0045792084420099854, "eval_signal/batch_coverage_1/centered_abs_mean": 0.3202243745326996, "eval_signal/batch_coverage_1/group_std_mean": 0.3795798420906067, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019652313785627484, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.0045792084420099854, "eval_signal/batch_coverage_10/centered_abs_mean": 0.2864103727042675, "eval_signal/batch_coverage_10/group_std_mean": 0.3406292721629143, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01759169646538794, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004095668322406709, "eval_signal/batch_coverage_15/centered_abs_mean": 0.24582325667142868, "eval_signal/batch_coverage_15/group_std_mean": 0.2919539734721184, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015123256016522646, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.0035152725758962333, "eval_signal/batch_coverage_20/centered_abs_mean": 0.21553993970155716, "eval_signal/batch_coverage_20/group_std_mean": 0.2582678012549877, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013275267789140344, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0030822211410850286, "eval_signal/batch_coverage_25/centered_abs_mean": 0.17267810739576817, "eval_signal/batch_coverage_25/group_std_mean": 0.20795756578445435, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.010598070221021771, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.002469296916387975, "eval_signal/batch_coverage_5/centered_abs_mean": 0.3202243745326996, "eval_signal/batch_coverage_5/group_std_mean": 0.3795798420906067, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.019652313785627484, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.0045792084420099854, "eval_signal/brier_reward/centered_abs_mean": 0.18581411615014076, "eval_signal/brier_reward/group_std_mean": 0.23873290419578552, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07966925017535686, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01858141180127859, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04621936194598675, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06602886598557234, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019818986766040325, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004621936357580125, "eval_signal/format_reward/centered_abs_mean": 0.007568359375, "eval_signal/format_reward/group_std_mean": 0.022097086533904076, "eval_signal/format_reward/group_zero_std_frac": 0.875, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.016376281157135963, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0032752566039562225, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965, "eval_steps_per_second": 0.087, "step": 200 }, { "calibration/aurc": 0.38683176341008096, "calibration/batch_distribution_entropy": 0.9805257817315436, "calibration/buffer_distribution_entropy": 0.9879561129306393, "calibration/confidence_entropy": 0.49477689589328566, "calibration/coverage@0%": 0.003923112534299723, "calibration/coverage@1%": 0.003923112534299723, "calibration/coverage@10%": 0.012533680048976825, "calibration/coverage@15%": 0.021561765810288375, "calibration/coverage@20%": 0.15010488087906798, "calibration/coverage@25%": 0.3062007384970836, "calibration/coverage@30%": 0.4057120030238863, "calibration/coverage@5%": 0.003923112534299723, "calibration/ece": 0.10678735465585007, "calibration/mean_confidence": 0.4535179735789959, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0029296875, "completions/max_length": 1202.4, "completions/max_terminated_length": 1202.4, "completions/mean_length": 291.9689453125, "completions/mean_terminated_length": 292.84205932617186, "completions/min_length": 0.0, "completions/min_terminated_length": 115.4, "epoch": 0.656, "grad_norm": 0.005559125449508429, "learning_rate": 1e-06, "loss": -0.0148, "num_tokens": 709428540.0, "reward": 0.9483369946479797, "reward_std": 0.08952396661043167, "rewards/accuracy_reward": 0.50654296875, "rewards/batch_coverage_0": 0.39867132902145386, "rewards/batch_coverage_1": 0.39867132902145386, "rewards/batch_coverage_10": 0.4268028914928436, "rewards/batch_coverage_15": 0.4317127227783203, "rewards/batch_coverage_20": 0.43587940335273745, "rewards/batch_coverage_25": 0.4377157986164093, "rewards/batch_coverage_5": 0.4093734323978424, "rewards/brier_reward": 0.8042103409767151, "rewards/confidence_uniqueness_reward": 0.9491950392723083, "rewards/format_reward": 0.99697265625, "rewards/frontier_entropy_batch_reward": -0.20786570012569427, "signal/accuracy_reward/centered_abs_mean": 0.083685302734375, "signal/accuracy_reward/group_std_mean": 0.1158929094672203, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7622359275817872, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0418426513671875, "signal/advantage_abs_mean": 0.7417587041854858, "signal/advantage_pre_scale_abs_mean": 0.0644775539636612, "signal/advantage_pre_scale_std": 0.11049062460660934, "signal/advantage_std": 0.9826884031295776, "signal/batch_coverage_0/centered_abs_mean": 0.1362083911895752, "signal/batch_coverage_0/group_std_mean": 0.17440303266048432, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.035590830445289615, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019477800466120242, "signal/batch_coverage_1/centered_abs_mean": 0.1362083911895752, "signal/batch_coverage_1/group_std_mean": 0.17440303266048432, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.035590830445289615, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019477800466120242, "signal/batch_coverage_10/centered_abs_mean": 0.14439732134342192, "signal/batch_coverage_10/group_std_mean": 0.1864805370569229, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03772576525807381, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002064881706610322, "signal/batch_coverage_15/centered_abs_mean": 0.14472835958004, "signal/batch_coverage_15/group_std_mean": 0.18738215863704683, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03775163665413857, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020696154795587065, "signal/batch_coverage_20/centered_abs_mean": 0.14556609988212585, "signal/batch_coverage_20/group_std_mean": 0.18835518658161163, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.037985429912805554, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020815952215343715, "signal/batch_coverage_25/centered_abs_mean": 0.14521074891090394, "signal/batch_coverage_25/group_std_mean": 0.18830261528491973, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03792016953229904, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020765136461704968, "signal/batch_coverage_5/centered_abs_mean": 0.13912782669067383, "signal/batch_coverage_5/group_std_mean": 0.1785370737314224, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03636412620544434, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019895279547199607, "signal/brier_reward/centered_abs_mean": 0.10540544837713242, "signal/brier_reward/group_std_mean": 0.13930064737796782, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1928122252225876, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010540544986724854, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0166949987411499, "signal/confidence_uniqueness_reward/group_std_mean": 0.02866690754890442, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030591477081179617, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001669499883428216, "signal/format_reward/centered_abs_mean": 0.005804443359375, "signal/format_reward/group_std_mean": 0.015443699806928635, "signal/format_reward/group_zero_std_frac": 0.91875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05277935266494751, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0029022216796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2575319021940231, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3293066442012787, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47052150368690493, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025753191113471983, "step": 205 }, { "calibration/aurc": 0.29186164006012527, "calibration/batch_distribution_entropy": 0.9697549633287637, "calibration/buffer_distribution_entropy": 0.9895672644051153, "calibration/confidence_entropy": 0.456708909674265, "calibration/coverage@0%": 0.03845063369159059, "calibration/coverage@1%": 0.06080357486806117, "calibration/coverage@10%": 0.19964626309757696, "calibration/coverage@15%": 0.24906825125678952, "calibration/coverage@20%": 0.2925871476414731, "calibration/coverage@25%": 0.3627023352931546, "calibration/coverage@30%": 0.4953006002253554, "calibration/coverage@5%": 0.14285450132901883, "calibration/ece": 0.17465116802533398, "calibration/mean_confidence": 0.5012703057618537, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 1274.4, "completions/max_terminated_length": 1274.4, "completions/mean_length": 283.65693359375, "completions/mean_terminated_length": 284.2161376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.672, "grad_norm": 0.007196416612714529, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 727246627.0, "reward": 0.9564376831054687, "reward_std": 0.08635174334049225, "rewards/accuracy_reward": 0.52783203125, "rewards/batch_coverage_0": 0.43465115427970885, "rewards/batch_coverage_1": 0.43465115427970885, "rewards/batch_coverage_10": 0.4558675825595856, "rewards/batch_coverage_15": 0.4573832511901855, "rewards/batch_coverage_20": 0.4624047577381134, "rewards/batch_coverage_25": 0.46600645780563354, "rewards/batch_coverage_5": 0.44994105100631715, "rewards/brier_reward": 0.8067228317260742, "rewards/confidence_uniqueness_reward": 0.9471780180931091, "rewards/format_reward": 0.998046875, "rewards/frontier_entropy_batch_reward": -0.27092793583869934, "signal/accuracy_reward/centered_abs_mean": 0.092340087890625, "signal/accuracy_reward/group_std_mean": 0.11898635029792785, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8868414759635925, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0461700439453125, "signal/advantage_abs_mean": 0.7663137316703796, "signal/advantage_pre_scale_abs_mean": 0.06598134487867355, "signal/advantage_pre_scale_std": 0.10942787975072861, "signal/advantage_std": 0.9825973153114319, "signal/batch_coverage_0/centered_abs_mean": 0.14729402363300323, "signal/batch_coverage_0/group_std_mean": 0.18762222528457642, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0407107375562191, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00210630449000746, "signal/batch_coverage_1/centered_abs_mean": 0.14729402363300323, "signal/batch_coverage_1/group_std_mean": 0.18762222528457642, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0407107375562191, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00210630449000746, "signal/batch_coverage_10/centered_abs_mean": 0.152842777967453, "signal/batch_coverage_10/group_std_mean": 0.19522206783294677, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.042253092676401136, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021856517065316438, "signal/batch_coverage_15/centered_abs_mean": 0.15091572403907777, "signal/batch_coverage_15/group_std_mean": 0.19295818507671356, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04171362891793251, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002158094779588282, "signal/batch_coverage_20/centered_abs_mean": 0.14871154427528382, "signal/batch_coverage_20/group_std_mean": 0.190624138712883, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04108003005385399, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021265751449391244, "signal/batch_coverage_25/centered_abs_mean": 0.15191508829593658, "signal/batch_coverage_25/group_std_mean": 0.19481480419635772, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04194371327757836, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002172385808080435, "signal/batch_coverage_5/centered_abs_mean": 0.15180889368057252, "signal/batch_coverage_5/group_std_mean": 0.19379131197929383, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.041916343942284585, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002170867146924138, "signal/brier_reward/centered_abs_mean": 0.10807926654815674, "signal/brier_reward/group_std_mean": 0.1404752403497696, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.208378604054451, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010807927139103413, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017506714165210723, "signal/confidence_uniqueness_reward/group_std_mean": 0.026788324862718583, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.033572429046034816, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001750671467743814, "signal/format_reward/centered_abs_mean": 0.00374755859375, "signal/format_reward/group_std_mean": 0.010039618238806725, "signal/format_reward/group_zero_std_frac": 0.946875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03476648181676865, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001873779296875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.295850133895874, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3710406005382538, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5690179944038392, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029585013911128045, "step": 210 }, { "calibration/aurc": 0.27406756960213874, "calibration/batch_distribution_entropy": 0.9594633878497765, "calibration/buffer_distribution_entropy": 0.9895704882282012, "calibration/confidence_entropy": 0.48489940278812715, "calibration/coverage@0%": 0.006256115459882583, "calibration/coverage@1%": 0.006256115459882583, "calibration/coverage@10%": 0.21693447284735812, "calibration/coverage@15%": 0.36089163405088065, "calibration/coverage@20%": 0.46685420743639916, "calibration/coverage@25%": 0.5340982754403131, "calibration/coverage@30%": 0.6794879831213307, "calibration/coverage@5%": 0.05547486545988258, "calibration/ece": 0.13974260050395604, "calibration/mean_confidence": 0.5057844655791297, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 1141.2, "completions/max_terminated_length": 1141.2, "completions/mean_length": 268.87275390625, "completions/mean_terminated_length": 269.24306030273436, "completions/min_length": 0.0, "completions/min_terminated_length": 109.8, "epoch": 0.688, "grad_norm": 0.006476765964180231, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 744953804.0, "reward": 0.9682312369346618, "reward_std": 0.08881005644798279, "rewards/accuracy_reward": 0.54833984375, "rewards/batch_coverage_0": 0.39735434055328367, "rewards/batch_coverage_1": 0.39735434055328367, "rewards/batch_coverage_10": 0.4274145483970642, "rewards/batch_coverage_15": 0.43324387073516846, "rewards/batch_coverage_20": 0.4401055335998535, "rewards/batch_coverage_25": 0.44292500615119934, "rewards/batch_coverage_5": 0.41305392384529116, "rewards/brier_reward": 0.8091730356216431, "rewards/confidence_uniqueness_reward": 0.9507735729217529, "rewards/format_reward": 0.9986328125, "rewards/frontier_entropy_batch_reward": -0.23455523550510407, "signal/accuracy_reward/centered_abs_mean": 0.090570068359375, "signal/accuracy_reward/group_std_mean": 0.12094291895627976, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8614253044128418, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0452850341796875, "signal/advantage_abs_mean": 0.7606715440750123, "signal/advantage_pre_scale_abs_mean": 0.06680728197097778, "signal/advantage_pre_scale_std": 0.11225859969854354, "signal/advantage_std": 0.9826042532920838, "signal/batch_coverage_0/centered_abs_mean": 0.12686864733695985, "signal/batch_coverage_0/group_std_mean": 0.1620851367712021, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03495507128536701, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018142216606065631, "signal/batch_coverage_1/centered_abs_mean": 0.12686864733695985, "signal/batch_coverage_1/group_std_mean": 0.1620851367712021, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03495507128536701, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018142216606065631, "signal/batch_coverage_10/centered_abs_mean": 0.13474982529878615, "signal/batch_coverage_10/group_std_mean": 0.17381620705127715, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037191484868526456, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019269224954769014, "signal/batch_coverage_15/centered_abs_mean": 0.1368393063545227, "signal/batch_coverage_15/group_std_mean": 0.17675977051258088, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03781359381973744, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.001956802140921354, "signal/batch_coverage_20/centered_abs_mean": 0.13707393407821655, "signal/batch_coverage_20/group_std_mean": 0.17823283076286317, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.037855605408549306, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0019601572304964064, "signal/batch_coverage_25/centered_abs_mean": 0.1342383250594139, "signal/batch_coverage_25/group_std_mean": 0.17535412609577178, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03705654367804527, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.001919607981108129, "signal/batch_coverage_5/centered_abs_mean": 0.13101311922073364, "signal/batch_coverage_5/group_std_mean": 0.1678726315498352, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.036108778417110445, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0018734877230599523, "signal/brier_reward/centered_abs_mean": 0.09857234209775925, "signal/brier_reward/group_std_mean": 0.12943488359451294, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18886724412441253, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009857234545052052, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015221798233687878, "signal/confidence_uniqueness_reward/group_std_mean": 0.023016730323433876, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029279665648937227, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001522179855965078, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_std_mean": 0.0077339802403002976, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02610982470214367, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28410218358039857, "signal/frontier_entropy_batch_reward/group_std_mean": 0.355901825428009, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5433173775672913, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02841021753847599, "step": 215 }, { "calibration/aurc": 0.19996440446534028, "calibration/batch_distribution_entropy": 0.9567731892139981, "calibration/buffer_distribution_entropy": 0.9900235492880984, "calibration/confidence_entropy": 0.4509047069588788, "calibration/coverage@0%": 0.02782164920762826, "calibration/coverage@1%": 0.02782164920762826, "calibration/coverage@10%": 0.30690071790702583, "calibration/coverage@15%": 0.422679509731975, "calibration/coverage@20%": 0.5450637356826291, "calibration/coverage@25%": 0.6725818932024864, "calibration/coverage@30%": 0.7527977329630483, "calibration/coverage@5%": 0.19183424225854726, "calibration/ece": 0.08909807422326481, "calibration/mean_confidence": 0.536424021286505, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 1099.4, "completions/max_terminated_length": 1099.4, "completions/mean_length": 258.80654296875, "completions/mean_terminated_length": 259.0838287353516, "completions/min_length": 0.0, "completions/min_terminated_length": 112.8, "epoch": 0.704, "grad_norm": 0.005805605091154575, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 762470127.0, "reward": 0.9764017224311828, "reward_std": 0.08271686583757401, "rewards/accuracy_reward": 0.55458984375, "rewards/batch_coverage_0": 0.4308918535709381, "rewards/batch_coverage_1": 0.4308918535709381, "rewards/batch_coverage_10": 0.4526013910770416, "rewards/batch_coverage_15": 0.4595561683177948, "rewards/batch_coverage_20": 0.466141951084137, "rewards/batch_coverage_25": 0.46850630044937136, "rewards/batch_coverage_5": 0.44223862886428833, "rewards/brier_reward": 0.8262721180915833, "rewards/confidence_uniqueness_reward": 0.950434684753418, "rewards/format_reward": 0.99873046875, "rewards/frontier_entropy_batch_reward": -0.22985949814319612, "signal/accuracy_reward/centered_abs_mean": 0.080413818359375, "signal/accuracy_reward/group_std_mean": 0.10305112153291703, "signal/accuracy_reward/group_zero_std_frac": 0.71875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7740343332290649, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0402069091796875, "signal/advantage_abs_mean": 0.7796499013900757, "signal/advantage_pre_scale_abs_mean": 0.06386686488986015, "signal/advantage_pre_scale_std": 0.10629049390554428, "signal/advantage_std": 0.9825819253921508, "signal/batch_coverage_0/centered_abs_mean": 0.13991228342056275, "signal/batch_coverage_0/group_std_mean": 0.17689195573329924, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03878602460026741, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020007457584142684, "signal/batch_coverage_1/centered_abs_mean": 0.13991228342056275, "signal/batch_coverage_1/group_std_mean": 0.17689195573329924, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03878602460026741, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020007457584142684, "signal/batch_coverage_10/centered_abs_mean": 0.14665709733963012, "signal/batch_coverage_10/group_std_mean": 0.18625611662864686, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040611236542463305, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020971963647753, "signal/batch_coverage_15/centered_abs_mean": 0.14724206030368805, "signal/batch_coverage_15/group_std_mean": 0.18765614330768585, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040764973312616345, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021055614575743674, "signal/batch_coverage_20/centered_abs_mean": 0.1504174143075943, "signal/batch_coverage_20/group_std_mean": 0.19247474074363707, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04158801585435867, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021509690210223197, "signal/batch_coverage_25/centered_abs_mean": 0.15163839161396026, "signal/batch_coverage_25/group_std_mean": 0.1941525250673294, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04197103381156921, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002168428944423795, "signal/batch_coverage_5/centered_abs_mean": 0.1431538850069046, "signal/batch_coverage_5/group_std_mean": 0.18127716183662415, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.039674467593431476, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020471004536375403, "signal/brier_reward/centered_abs_mean": 0.09896044880151748, "signal/brier_reward/group_std_mean": 0.1299522638320923, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19169933199882508, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009896045178174972, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014785858243703843, "signal/confidence_uniqueness_reward/group_std_mean": 0.02252316027879715, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028771713748574256, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014785858802497387, "signal/format_reward/centered_abs_mean": 0.002459716796875, "signal/format_reward/group_std_mean": 0.007181553030386567, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.023820652440190316, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28561203479766845, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35598132014274597, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5556212723255157, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02856120392680168, "step": 220 }, { "calibration/aurc": 0.2123510723959514, "calibration/batch_distribution_entropy": 0.9698352011806625, "calibration/buffer_distribution_entropy": 0.9893924504398015, "calibration/confidence_entropy": 0.4456485289692901, "calibration/coverage@0%": 0.08373927396300987, "calibration/coverage@1%": 0.14006280337477456, "calibration/coverage@10%": 0.2483180117081079, "calibration/coverage@15%": 0.28428921268850005, "calibration/coverage@20%": 0.5425642662887074, "calibration/coverage@25%": 0.6289766227552664, "calibration/coverage@30%": 0.7533506844518629, "calibration/coverage@5%": 0.19556372249242163, "calibration/ece": 0.14619324149307394, "calibration/mean_confidence": 0.49175718656707834, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 1078.4, "completions/max_terminated_length": 1078.4, "completions/mean_length": 265.30068359375, "completions/mean_terminated_length": 265.55845336914064, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.72, "grad_norm": 0.006270843092352152, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 780196662.0, "reward": 0.9827430009841919, "reward_std": 0.08292621672153473, "rewards/accuracy_reward": 0.5748046875, "rewards/batch_coverage_0": 0.4352779507637024, "rewards/batch_coverage_1": 0.4352779507637024, "rewards/batch_coverage_10": 0.45996217131614686, "rewards/batch_coverage_15": 0.4620618462562561, "rewards/batch_coverage_20": 0.46976361274719236, "rewards/batch_coverage_25": 0.47316410541534426, "rewards/batch_coverage_5": 0.45016440749168396, "rewards/brier_reward": 0.8190774083137512, "rewards/confidence_uniqueness_reward": 0.9467625141143798, "rewards/format_reward": 0.9990234375, "rewards/frontier_entropy_batch_reward": -0.26310152411460874, "signal/accuracy_reward/centered_abs_mean": 0.0859619140625, "signal/accuracy_reward/group_std_mean": 0.11375608295202255, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8471159696578979, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04298095703125, "signal/advantage_abs_mean": 0.7652387022972107, "signal/advantage_pre_scale_abs_mean": 0.06312915831804275, "signal/advantage_pre_scale_std": 0.10508692562580109, "signal/advantage_std": 0.9825536012649536, "signal/batch_coverage_0/centered_abs_mean": 0.13881113529205322, "signal/batch_coverage_0/group_std_mean": 0.17654796242713927, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.039122577756643295, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019849991658702493, "signal/batch_coverage_1/centered_abs_mean": 0.13881113529205322, "signal/batch_coverage_1/group_std_mean": 0.17654796242713927, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.039122577756643295, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019849991658702493, "signal/batch_coverage_10/centered_abs_mean": 0.14516532719135283, "signal/batch_coverage_10/group_std_mean": 0.18465124368667601, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04085480272769928, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002075864188373089, "signal/batch_coverage_15/centered_abs_mean": 0.14553692936897278, "signal/batch_coverage_15/group_std_mean": 0.18506303429603577, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04095983579754829, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020811779890209437, "signal/batch_coverage_20/centered_abs_mean": 0.15031401813030243, "signal/batch_coverage_20/group_std_mean": 0.19162946045398713, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04231066554784775, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.00214949045330286, "signal/batch_coverage_25/centered_abs_mean": 0.15131179988384247, "signal/batch_coverage_25/group_std_mean": 0.1932460606098175, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04257652685046196, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021637586876749994, "signal/batch_coverage_5/centered_abs_mean": 0.14311102628707886, "signal/batch_coverage_5/group_std_mean": 0.18209123611450195, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04032421484589577, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002046487620100379, "signal/brier_reward/centered_abs_mean": 0.1019532933831215, "signal/brier_reward/group_std_mean": 0.1325368106365204, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2007545202970505, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010195329040288924, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016168445721268652, "signal/confidence_uniqueness_reward/group_std_mean": 0.023263829201459883, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03172456994652748, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016168446512892843, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.00552427158690989, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01848184745758772, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2896900773048401, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36198447942733764, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5700592041015625, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02896900735795498, "step": 225 }, { "calibration/aurc": 0.19987491369889768, "calibration/batch_distribution_entropy": 0.9682028960945696, "calibration/buffer_distribution_entropy": 0.9892767039546803, "calibration/confidence_entropy": 0.4514352722551944, "calibration/coverage@0%": 0.033621269569471625, "calibration/coverage@1%": 0.033621269569471625, "calibration/coverage@10%": 0.19240918542074364, "calibration/coverage@15%": 0.43307546477495096, "calibration/coverage@20%": 0.5825441841976516, "calibration/coverage@25%": 0.7206694899706457, "calibration/coverage@30%": 0.8360804488747553, "calibration/coverage@5%": 0.07077574608610568, "calibration/ece": 0.1259782288823052, "calibration/mean_confidence": 0.5549969590745947, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 987.0, "completions/max_terminated_length": 987.0, "completions/mean_length": 279.36240234375, "completions/mean_terminated_length": 279.71857299804685, "completions/min_length": 0.0, "completions/min_terminated_length": 117.2, "epoch": 0.736, "grad_norm": 0.006227577105164528, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 797996917.0, "reward": 0.990349292755127, "reward_std": 0.07996446937322617, "rewards/accuracy_reward": 0.584765625, "rewards/batch_coverage_0": 0.4305130660533905, "rewards/batch_coverage_1": 0.4305130660533905, "rewards/batch_coverage_10": 0.461780971288681, "rewards/batch_coverage_15": 0.4661433219909668, "rewards/batch_coverage_20": 0.47316175103187563, "rewards/batch_coverage_25": 0.4773706912994385, "rewards/batch_coverage_5": 0.4513450086116791, "rewards/brier_reward": 0.8201140403747559, "rewards/confidence_uniqueness_reward": 0.9489932417869568, "rewards/format_reward": 0.99873046875, "rewards/frontier_entropy_batch_reward": -0.23938325345516204, "signal/accuracy_reward/centered_abs_mean": 0.0771484375, "signal/accuracy_reward/group_std_mean": 0.1027161180973053, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7959671258926392, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03857421875, "signal/advantage_abs_mean": 0.7633894324302674, "signal/advantage_pre_scale_abs_mean": 0.060272646695375444, "signal/advantage_pre_scale_std": 0.10310345590114593, "signal/advantage_std": 0.9824482798576355, "signal/batch_coverage_0/centered_abs_mean": 0.12894001603126526, "signal/batch_coverage_0/group_std_mean": 0.16431694328784943, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0384144626557827, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018438422121107578, "signal/batch_coverage_1/centered_abs_mean": 0.12894001603126526, "signal/batch_coverage_1/group_std_mean": 0.16431694328784943, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0384144626557827, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018438422121107578, "signal/batch_coverage_10/centered_abs_mean": 0.13949068188667296, "signal/batch_coverage_10/group_std_mean": 0.17866944074630736, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.041481245309114456, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019947167951613665, "signal/batch_coverage_15/centered_abs_mean": 0.14124009311199187, "signal/batch_coverage_15/group_std_mean": 0.1812742084264755, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04199150204658508, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002019733446650207, "signal/batch_coverage_20/centered_abs_mean": 0.1427465260028839, "signal/batch_coverage_20/group_std_mean": 0.18400496244430542, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04241336733102798, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020412752870470285, "signal/batch_coverage_25/centered_abs_mean": 0.1435283601284027, "signal/batch_coverage_25/group_std_mean": 0.18493232727050782, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04260774925351143, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002052455488592386, "signal/batch_coverage_5/centered_abs_mean": 0.1360446184873581, "signal/batch_coverage_5/group_std_mean": 0.17352403700351715, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.040481310337781906, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019454380497336387, "signal/brier_reward/centered_abs_mean": 0.09750867635011673, "signal/brier_reward/group_std_mean": 0.12824745923280717, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20277776420116425, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009750867821276188, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015282927080988884, "signal/confidence_uniqueness_reward/group_std_mean": 0.022277648746967315, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03166734613478184, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015282927313819527, "signal/format_reward/centered_abs_mean": 0.002423095703125, "signal/format_reward/group_std_mean": 0.006449723429977894, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.025392506457865237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012115478515625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2773844122886658, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3488776504993439, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5746127367019653, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02773844264447689, "step": 230 }, { "calibration/aurc": 0.23799171771509514, "calibration/batch_distribution_entropy": 0.980168539402238, "calibration/buffer_distribution_entropy": 0.9898585157062338, "calibration/confidence_entropy": 0.4811932019738524, "calibration/coverage@0%": 0.0625091911764706, "calibration/coverage@1%": 0.0675873161764706, "calibration/coverage@10%": 0.2684972426470588, "calibration/coverage@15%": 0.36816942401960784, "calibration/coverage@20%": 0.4830346200980392, "calibration/coverage@25%": 0.5702052696078431, "calibration/coverage@30%": 0.6597074142156862, "calibration/coverage@5%": 0.14571231617647057, "calibration/ece": 0.10425353764749187, "calibration/mean_confidence": 0.4810001862033529, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 926.4, "completions/max_terminated_length": 926.4, "completions/mean_length": 269.08896484375, "completions/mean_terminated_length": 269.4028564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 122.8, "epoch": 0.752, "grad_norm": 0.005794130731374025, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 815979588.0, "reward": 0.9808092594146729, "reward_std": 0.08298141807317734, "rewards/accuracy_reward": 0.5640625, "rewards/batch_coverage_0": 0.4113637089729309, "rewards/batch_coverage_1": 0.4113637089729309, "rewards/batch_coverage_10": 0.44650899767875674, "rewards/batch_coverage_15": 0.45241751074790953, "rewards/batch_coverage_20": 0.4543795883655548, "rewards/batch_coverage_25": 0.4559151649475098, "rewards/batch_coverage_5": 0.4355119466781616, "rewards/brier_reward": 0.8173823356628418, "rewards/confidence_uniqueness_reward": 0.950950539112091, "rewards/format_reward": 0.998828125, "rewards/frontier_entropy_batch_reward": -0.21333999633789064, "signal/accuracy_reward/centered_abs_mean": 0.0805908203125, "signal/accuracy_reward/group_std_mean": 0.10987765192985535, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7611582159996033, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04029541015625, "signal/advantage_abs_mean": 0.7552126884460449, "signal/advantage_pre_scale_abs_mean": 0.061991773545742035, "signal/advantage_pre_scale_std": 0.10449737906455994, "signal/advantage_std": 0.982585608959198, "signal/batch_coverage_0/centered_abs_mean": 0.13574532866477967, "signal/batch_coverage_0/group_std_mean": 0.1734715759754181, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03727240189909935, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019411582965403796, "signal/batch_coverage_1/centered_abs_mean": 0.13574532866477967, "signal/batch_coverage_1/group_std_mean": 0.1734715759754181, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03727240189909935, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019411582965403796, "signal/batch_coverage_10/centered_abs_mean": 0.14469059854745864, "signal/batch_coverage_10/group_std_mean": 0.186430162191391, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03972938433289528, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00206907547544688, "signal/batch_coverage_15/centered_abs_mean": 0.14610361456871032, "signal/batch_coverage_15/group_std_mean": 0.18892171382904052, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040112358331680295, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020892817061394452, "signal/batch_coverage_20/centered_abs_mean": 0.14735167622566223, "signal/batch_coverage_20/group_std_mean": 0.19071140885353088, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04044778645038605, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002107128966599703, "signal/batch_coverage_25/centered_abs_mean": 0.1486743688583374, "signal/batch_coverage_25/group_std_mean": 0.19229290783405303, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04084197878837585, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002126043476164341, "signal/batch_coverage_5/centered_abs_mean": 0.14109062254428864, "signal/batch_coverage_5/group_std_mean": 0.1808608740568161, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03873511925339699, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020175960380584, "signal/brier_reward/centered_abs_mean": 0.09518031924962997, "signal/brier_reward/group_std_mean": 0.12545545548200607, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18371520936489105, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009518032148480415, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013876733742654324, "signal/confidence_uniqueness_reward/group_std_mean": 0.020745597779750824, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026762987300753594, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013876733602955938, "signal/format_reward/centered_abs_mean": 0.00225830078125, "signal/format_reward/group_std_mean": 0.0062928175088018175, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.021861393004655838, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001129150390625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2675036698579788, "signal/frontier_entropy_batch_reward/group_std_mean": 0.341668963432312, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5147171020507812, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026750367507338522, "step": 235 }, { "calibration/aurc": 0.24226126920454516, "calibration/batch_distribution_entropy": 0.9618339604859422, "calibration/buffer_distribution_entropy": 0.990064646374362, "calibration/confidence_entropy": 0.4670138911337166, "calibration/coverage@0%": 0.09453125, "calibration/coverage@1%": 0.148046875, "calibration/coverage@10%": 0.29375, "calibration/coverage@15%": 0.328125, "calibration/coverage@20%": 0.46796875, "calibration/coverage@25%": 0.547265625, "calibration/coverage@30%": 0.644140625, "calibration/coverage@5%": 0.244921875, "calibration/ece": 0.13594110362016193, "calibration/mean_confidence": 0.5271706740493869, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1110.0, "completions/max_terminated_length": 1110.0, "completions/mean_length": 265.80791015625, "completions/mean_terminated_length": 265.9118988037109, "completions/min_length": 23.6, "completions/min_terminated_length": 119.2, "epoch": 0.768, "grad_norm": 0.006786343641579151, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 833634165.0, "reward": 0.9572334289550781, "reward_std": 0.07809762060642242, "rewards/accuracy_reward": 0.5166015625, "rewards/batch_coverage_0": 0.4477431833744049, "rewards/batch_coverage_1": 0.4477431833744049, "rewards/batch_coverage_10": 0.47420247197151183, "rewards/batch_coverage_15": 0.47486208081245423, "rewards/batch_coverage_20": 0.4812659859657288, "rewards/batch_coverage_25": 0.481998997926712, "rewards/batch_coverage_5": 0.4603855133056641, "rewards/brier_reward": 0.8310378670692444, "rewards/confidence_uniqueness_reward": 0.9483831882476806, "rewards/format_reward": 0.999609375, "rewards/frontier_entropy_batch_reward": -0.2554940521717072, "signal/accuracy_reward/centered_abs_mean": 0.074072265625, "signal/accuracy_reward/group_std_mean": 0.09812594801187516, "signal/accuracy_reward/group_zero_std_frac": 0.721875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7703619718551635, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0370361328125, "signal/advantage_abs_mean": 0.7758382797241211, "signal/advantage_pre_scale_abs_mean": 0.060599684715270996, "signal/advantage_pre_scale_std": 0.10211093574762345, "signal/advantage_std": 0.9824242711067199, "signal/batch_coverage_0/centered_abs_mean": 0.13136267066001892, "signal/batch_coverage_0/group_std_mean": 0.16766657829284667, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03942425549030304, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018784862011671066, "signal/batch_coverage_1/centered_abs_mean": 0.13136267066001892, "signal/batch_coverage_1/group_std_mean": 0.16766657829284667, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03942425549030304, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018784862011671066, "signal/batch_coverage_10/centered_abs_mean": 0.13855439722537993, "signal/batch_coverage_10/group_std_mean": 0.17847085297107695, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.041663312911987306, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019813278689980508, "signal/batch_coverage_15/centered_abs_mean": 0.13833429515361786, "signal/batch_coverage_15/group_std_mean": 0.17840952575206756, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04161200672388077, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0019781803712248802, "signal/batch_coverage_20/centered_abs_mean": 0.14173888862133027, "signal/batch_coverage_20/group_std_mean": 0.18329765796661376, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04259318187832832, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020268660970032214, "signal/batch_coverage_25/centered_abs_mean": 0.14233888685703278, "signal/batch_coverage_25/group_std_mean": 0.18399560451507568, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04274929314851761, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020354459527879955, "signal/batch_coverage_5/centered_abs_mean": 0.13465096652507783, "signal/batch_coverage_5/group_std_mean": 0.1721017152070999, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.040450369566679, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.001925508794374764, "signal/brier_reward/centered_abs_mean": 0.0896749809384346, "signal/brier_reward/group_std_mean": 0.11739355921745301, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18861537277698517, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.008967497944831848, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014798154309391975, "signal/confidence_uniqueness_reward/group_std_mean": 0.019825227186083795, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031269267573952673, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014798154355958105, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007893532142043113, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2812827706336975, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3491488456726074, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5921412825584411, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028128278255462647, "step": 240 }, { "calibration/aurc": 0.24932977897988673, "calibration/batch_distribution_entropy": 0.9554029734603953, "calibration/buffer_distribution_entropy": 0.9895763593851804, "calibration/confidence_entropy": 0.42567792957542994, "calibration/coverage@0%": 0.09453201443248532, "calibration/coverage@1%": 0.10937576443248531, "calibration/coverage@10%": 0.3043740826810176, "calibration/coverage@15%": 0.3973940496575342, "calibration/coverage@20%": 0.4798426797945206, "calibration/coverage@25%": 0.5415835983365949, "calibration/coverage@30%": 0.5865322284735812, "calibration/coverage@5%": 0.21290973581213307, "calibration/ece": 0.13702020641332538, "calibration/mean_confidence": 0.5154663940828144, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 852.4, "completions/max_terminated_length": 852.4, "completions/mean_length": 260.136328125, "completions/mean_terminated_length": 260.23645935058596, "completions/min_length": 71.0, "completions/min_terminated_length": 121.0, "epoch": 0.784, "grad_norm": 0.006491308566182852, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 851472329.0, "reward": 0.9818279027938843, "reward_std": 0.07545744925737381, "rewards/accuracy_reward": 0.57529296875, "rewards/batch_coverage_0": 0.43470929861068724, "rewards/batch_coverage_1": 0.43470929861068724, "rewards/batch_coverage_10": 0.44828411340713503, "rewards/batch_coverage_15": 0.4518063485622406, "rewards/batch_coverage_20": 0.4546870529651642, "rewards/batch_coverage_25": 0.4580719113349915, "rewards/batch_coverage_5": 0.44084044694900515, "rewards/brier_reward": 0.7951297760009766, "rewards/confidence_uniqueness_reward": 0.9488907814025879, "rewards/format_reward": 0.999609375, "rewards/frontier_entropy_batch_reward": -0.24685774147510528, "signal/accuracy_reward/centered_abs_mean": 0.076690673828125, "signal/accuracy_reward/group_std_mean": 0.09880765974521637, "signal/accuracy_reward/group_zero_std_frac": 0.728125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7718483328819274, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0383453369140625, "signal/advantage_abs_mean": 0.7896058082580566, "signal/advantage_pre_scale_abs_mean": 0.05918871089816093, "signal/advantage_pre_scale_std": 0.09708251059055328, "signal/advantage_std": 0.9824390530586242, "signal/batch_coverage_0/centered_abs_mean": 0.14222476333379747, "signal/batch_coverage_0/group_std_mean": 0.17862839102745057, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04163095131516457, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020338141359388827, "signal/batch_coverage_1/centered_abs_mean": 0.14222476333379747, "signal/batch_coverage_1/group_std_mean": 0.17862839102745057, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04163095131516457, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020338141359388827, "signal/batch_coverage_10/centered_abs_mean": 0.14671584963798523, "signal/batch_coverage_10/group_std_mean": 0.1848911762237549, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.042957622557878494, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002098036650568247, "signal/batch_coverage_15/centered_abs_mean": 0.1478523552417755, "signal/batch_coverage_15/group_std_mean": 0.18667379319667815, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.043281296640634535, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021142886485904454, "signal/batch_coverage_20/centered_abs_mean": 0.14663981795310974, "signal/batch_coverage_20/group_std_mean": 0.1858953207731247, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04296448454260826, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020969494245946406, "signal/batch_coverage_25/centered_abs_mean": 0.14821869730949402, "signal/batch_coverage_25/group_std_mean": 0.1883733570575714, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04351719543337822, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021195273380726575, "signal/batch_coverage_5/centered_abs_mean": 0.14388411343097687, "signal/batch_coverage_5/group_std_mean": 0.1807178020477295, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04210501462221146, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002057542884722352, "signal/brier_reward/centered_abs_mean": 0.10256388038396835, "signal/brier_reward/group_std_mean": 0.13275916874408722, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2102281779050827, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01025638859719038, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01494374144822359, "signal/confidence_uniqueness_reward/group_std_mean": 0.019778795540332794, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03137281015515327, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014943741960451006, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008183719962835312, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29007603526115416, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36126854419708254, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6033417701721191, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029007603973150255, "step": 245 }, { "calibration/aurc": 0.16903278555018358, "calibration/batch_distribution_entropy": 0.957089997582391, "calibration/buffer_distribution_entropy": 0.989071790702158, "calibration/confidence_entropy": 0.4465539348579215, "calibration/coverage@0%": 0.11685879403131114, "calibration/coverage@1%": 0.15361958781800392, "calibration/coverage@10%": 0.4617462695694717, "calibration/coverage@15%": 0.5513117661448141, "calibration/coverage@20%": 0.6224712573385519, "calibration/coverage@25%": 0.6889233732876712, "calibration/coverage@30%": 0.7768644508317025, "calibration/coverage@5%": 0.3037181996086106, "calibration/ece": 0.11512091478718908, "calibration/mean_confidence": 0.4906129472813886, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 875.6, "completions/max_terminated_length": 875.6, "completions/mean_length": 248.35615234375, "completions/mean_terminated_length": 248.42735900878907, "completions/min_length": 73.0, "completions/min_terminated_length": 121.8, "epoch": 0.8, "grad_norm": 0.0064397272653877735, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 869026056.0, "reward": 0.9991943836212158, "reward_std": 0.0748121589422226, "rewards/accuracy_reward": 0.6056640625, "rewards/batch_coverage_0": 0.44797525405883787, "rewards/batch_coverage_1": 0.44797525405883787, "rewards/batch_coverage_10": 0.47836520075798034, "rewards/batch_coverage_15": 0.48425130248069764, "rewards/batch_coverage_20": 0.4894311368465424, "rewards/batch_coverage_25": 0.4941446602344513, "rewards/batch_coverage_5": 0.466780948638916, "rewards/brier_reward": 0.8302229762077331, "rewards/confidence_uniqueness_reward": 0.9474293708801269, "rewards/format_reward": 0.999609375, "rewards/frontier_entropy_batch_reward": -0.2852515548467636, "signal/accuracy_reward/centered_abs_mean": 0.0673095703125, "signal/accuracy_reward/group_std_mean": 0.09558814465999603, "signal/accuracy_reward/group_zero_std_frac": 0.70625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7255583643913269, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03365478515625, "signal/advantage_abs_mean": 0.7604370594024659, "signal/advantage_pre_scale_abs_mean": 0.05594034641981125, "signal/advantage_pre_scale_std": 0.09674849212169648, "signal/advantage_std": 0.9823638439178467, "signal/batch_coverage_0/centered_abs_mean": 0.13314661681652068, "signal/batch_coverage_0/group_std_mean": 0.17028163969516755, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04120338633656502, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019039965933188795, "signal/batch_coverage_1/centered_abs_mean": 0.13314661681652068, "signal/batch_coverage_1/group_std_mean": 0.17028163969516755, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04120338633656502, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019039965933188795, "signal/batch_coverage_10/centered_abs_mean": 0.14108843207359315, "signal/batch_coverage_10/group_std_mean": 0.1814536929130554, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043681205809116365, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020175645360723136, "signal/batch_coverage_15/centered_abs_mean": 0.14084658324718474, "signal/batch_coverage_15/group_std_mean": 0.1811590611934662, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04360946193337441, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020141060929745437, "signal/batch_coverage_20/centered_abs_mean": 0.14108088612556458, "signal/batch_coverage_20/group_std_mean": 0.18242039680480956, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04367534294724464, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002017456665635109, "signal/batch_coverage_25/centered_abs_mean": 0.1456657886505127, "signal/batch_coverage_25/group_std_mean": 0.18809856474399567, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04508034512400627, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002083020796999335, "signal/batch_coverage_5/centered_abs_mean": 0.13754327595233917, "signal/batch_coverage_5/group_std_mean": 0.1764551192522049, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04256564825773239, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00196686873678118, "signal/brier_reward/centered_abs_mean": 0.09132596403360367, "signal/brier_reward/group_std_mean": 0.11983357220888138, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19744545817375184, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009132596850395202, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015021534264087677, "signal/confidence_uniqueness_reward/group_std_mean": 0.01983652338385582, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03249132037162781, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015021534636616707, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007924291491508483, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2975069582462311, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3681724011898041, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6431950688362121, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0297506969422102, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4056399663475222, "eval_calibration/batch_distribution_entropy": 0.9463703104773048, "eval_calibration/buffer_distribution_entropy": 0.9892823149568434, "eval_calibration/confidence_entropy": 0.4557911057849003, "eval_calibration/coverage@0%": 0.1171875, "eval_calibration/coverage@1%": 0.1171875, "eval_calibration/coverage@10%": 0.15625, "eval_calibration/coverage@15%": 0.171875, "eval_calibration/coverage@20%": 0.2265625, "eval_calibration/coverage@25%": 0.3359375, "eval_calibration/coverage@30%": 0.375, "eval_calibration/coverage@5%": 0.1171875, "eval_calibration/ece": 0.208780844006349, "eval_calibration/mean_confidence": 0.47532201440066835, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 655.25, "eval_completions/max_terminated_length": 655.25, "eval_completions/mean_length": 259.66365814208984, "eval_completions/mean_terminated_length": 259.66365814208984, "eval_completions/min_length": 132.25, "eval_completions/min_terminated_length": 132.25, "eval_loss": 0.0, "eval_num_tokens": 869026056.0, "eval_reward": 0.808761790394783, "eval_reward_std": 0.23153527826070786, "eval_rewards/accuracy_reward": 0.4453125, "eval_rewards/batch_coverage_0": 0.16463664919137955, "eval_rewards/batch_coverage_1": 0.16463664919137955, "eval_rewards/batch_coverage_10": 0.151813842356205, "eval_rewards/batch_coverage_15": 0.15031159296631813, "eval_rewards/batch_coverage_20": 0.12830344960093498, "eval_rewards/batch_coverage_25": 0.10411388799548149, "eval_rewards/batch_coverage_5": 0.16463664919137955, "eval_rewards/brier_reward": 0.8128635436296463, "eval_rewards/confidence_uniqueness_reward": 0.901123046875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 29.3019, "eval_samples_per_second": 17.064, "eval_signal/accuracy_reward/centered_abs_mean": 0.473876953125, "eval_signal/accuracy_reward/group_std_mean": 0.4942095950245857, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0256902873516083, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2369384765625, "eval_signal/advantage_abs_mean": 0.9391559809446335, "eval_signal/advantage_pre_scale_abs_mean": 0.21801294013857841, "eval_signal/advantage_pre_scale_std": 0.22905661538243294, "eval_signal/advantage_std": 0.9876904189586639, "eval_signal/batch_coverage_0/centered_abs_mean": 0.28715651854872704, "eval_signal/batch_coverage_0/group_std_mean": 0.3519328162074089, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017829164396971464, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004106338135898113, "eval_signal/batch_coverage_1/centered_abs_mean": 0.28715651854872704, "eval_signal/batch_coverage_1/group_std_mean": 0.3519328162074089, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017829164396971464, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004106338135898113, "eval_signal/batch_coverage_10/centered_abs_mean": 0.2590860575437546, "eval_signal/batch_coverage_10/group_std_mean": 0.31707237660884857, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01609125966206193, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.0037049305392429233, "eval_signal/batch_coverage_15/centered_abs_mean": 0.25385092943906784, "eval_signal/batch_coverage_15/group_std_mean": 0.31071092188358307, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015773584134876728, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.003630068327765912, "eval_signal/batch_coverage_20/centered_abs_mean": 0.1938122659921646, "eval_signal/batch_coverage_20/group_std_mean": 0.23961098864674568, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012016238179057837, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0027715153992176056, "eval_signal/batch_coverage_25/centered_abs_mean": 0.14399352297186852, "eval_signal/batch_coverage_25/group_std_mean": 0.1793827824294567, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.008922932553105056, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020591074135154486, "eval_signal/batch_coverage_5/centered_abs_mean": 0.28715651854872704, "eval_signal/batch_coverage_5/group_std_mean": 0.3519328162074089, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017829164396971464, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004106338135898113, "eval_signal/brier_reward/centered_abs_mean": 0.18446644395589828, "eval_signal/brier_reward/group_std_mean": 0.23762714117765427, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08009063266217709, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018446644535288215, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037994384765625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04532748367637396, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.016460294369608164, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037994384765625, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.137, "step": 250 }, { "calibration/aurc": 0.19802390820258206, "calibration/batch_distribution_entropy": 0.9488017382610578, "calibration/buffer_distribution_entropy": 0.9891109492581016, "calibration/confidence_entropy": 0.4503818100967645, "calibration/coverage@0%": 0.037109375, "calibration/coverage@1%": 0.037109375, "calibration/coverage@10%": 0.291015625, "calibration/coverage@15%": 0.38515625, "calibration/coverage@20%": 0.559375, "calibration/coverage@25%": 0.65859375, "calibration/coverage@30%": 0.803125, "calibration/coverage@5%": 0.184765625, "calibration/ece": 0.11336487553026367, "calibration/mean_confidence": 0.5387612728269289, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 979.0, "completions/max_terminated_length": 979.0, "completions/mean_length": 246.88466796875, "completions/mean_terminated_length": 246.9087158203125, "completions/min_length": 96.6, "completions/min_terminated_length": 121.4, "epoch": 0.816, "grad_norm": 0.005885324906557798, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 886653323.0, "reward": 0.9899710774421692, "reward_std": 0.08030715435743332, "rewards/accuracy_reward": 0.5884765625, "rewards/batch_coverage_0": 0.4053687870502472, "rewards/batch_coverage_1": 0.4053687870502472, "rewards/batch_coverage_10": 0.4392300844192505, "rewards/batch_coverage_15": 0.44483659863471986, "rewards/batch_coverage_20": 0.44895498752593993, "rewards/batch_coverage_25": 0.4509244620800018, "rewards/batch_coverage_5": 0.424567312002182, "rewards/brier_reward": 0.8162455439567566, "rewards/confidence_uniqueness_reward": 0.9505211353302002, "rewards/format_reward": 0.9998046875, "rewards/frontier_entropy_batch_reward": -0.24021487236022948, "signal/accuracy_reward/centered_abs_mean": 0.0829345703125, "signal/accuracy_reward/group_std_mean": 0.11002247482538223, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8337624669075012, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04146728515625, "signal/advantage_abs_mean": 0.7705082893371582, "signal/advantage_pre_scale_abs_mean": 0.06182959377765655, "signal/advantage_pre_scale_std": 0.10288000404834748, "signal/advantage_std": 0.9825070142745972, "signal/batch_coverage_0/centered_abs_mean": 0.1394294634461403, "signal/batch_coverage_0/group_std_mean": 0.17551667094230652, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04024726450443268, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001993841305375099, "signal/batch_coverage_1/centered_abs_mean": 0.1394294634461403, "signal/batch_coverage_1/group_std_mean": 0.17551667094230652, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04024726450443268, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001993841305375099, "signal/batch_coverage_10/centered_abs_mean": 0.15050265192985535, "signal/batch_coverage_10/group_std_mean": 0.19143196940422058, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043409749120473864, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021521878661587834, "signal/batch_coverage_15/centered_abs_mean": 0.15238051116466522, "signal/batch_coverage_15/group_std_mean": 0.19435729682445527, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04396386295557022, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021790412720292807, "signal/batch_coverage_20/centered_abs_mean": 0.15019435286521912, "signal/batch_coverage_20/group_std_mean": 0.19183135628700257, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04335668459534645, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002147779194638133, "signal/batch_coverage_25/centered_abs_mean": 0.14935584962368012, "signal/batch_coverage_25/group_std_mean": 0.1913298785686493, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.043079151213169097, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021357886493206025, "signal/batch_coverage_5/centered_abs_mean": 0.1444351464509964, "signal/batch_coverage_5/group_std_mean": 0.18239499628543854, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0416886031627655, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020654225954785943, "signal/brier_reward/centered_abs_mean": 0.09700828045606613, "signal/brier_reward/group_std_mean": 0.12455925345420837, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19543475210666655, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009700828790664673, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013165067881345749, "signal/confidence_uniqueness_reward/group_std_mean": 0.01705835647881031, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02658823914825916, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013165067881345748, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037154631689190866, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28098098635673524, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3505799949169159, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5675266325473786, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028098098933696747, "step": 255 }, { "calibration/aurc": 0.25830381510833755, "calibration/batch_distribution_entropy": 0.9757110148254032, "calibration/buffer_distribution_entropy": 0.9882824188981054, "calibration/confidence_entropy": 0.46318351392250356, "calibration/coverage@0%": 0.04375, "calibration/coverage@1%": 0.04375, "calibration/coverage@10%": 0.270703125, "calibration/coverage@15%": 0.33828125, "calibration/coverage@20%": 0.412890625, "calibration/coverage@25%": 0.52109375, "calibration/coverage@30%": 0.629296875, "calibration/coverage@5%": 0.2, "calibration/ece": 0.11027176139179815, "calibration/mean_confidence": 0.49241658529860094, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 855.8, "completions/max_terminated_length": 855.8, "completions/mean_length": 251.2025390625, "completions/mean_terminated_length": 251.2025390625, "completions/min_length": 124.6, "completions/min_terminated_length": 124.6, "epoch": 0.832, "grad_norm": 0.006431729532778263, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 904233989.0, "reward": 0.9797815322875977, "reward_std": 0.07985545843839645, "rewards/accuracy_reward": 0.5634765625, "rewards/batch_coverage_0": 0.43724015951156614, "rewards/batch_coverage_1": 0.43724015951156614, "rewards/batch_coverage_10": 0.4717229902744293, "rewards/batch_coverage_15": 0.4749358117580414, "rewards/batch_coverage_20": 0.48062952160835265, "rewards/batch_coverage_25": 0.4813665568828583, "rewards/batch_coverage_5": 0.45433294773101807, "rewards/brier_reward": 0.8289217233657837, "rewards/confidence_uniqueness_reward": 0.9487106323242187, "rewards/format_reward": 1.0, "rewards/frontier_entropy_batch_reward": -0.26015793681144717, "signal/accuracy_reward/centered_abs_mean": 0.083837890625, "signal/accuracy_reward/group_std_mean": 0.11118160039186478, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8772681474685669, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0419189453125, "signal/advantage_abs_mean": 0.7730329275131226, "signal/advantage_pre_scale_abs_mean": 0.06214970722794533, "signal/advantage_pre_scale_std": 0.10454044938087463, "signal/advantage_std": 0.9824274897575378, "signal/batch_coverage_0/centered_abs_mean": 0.13734543919563294, "signal/batch_coverage_0/group_std_mean": 0.1743619203567505, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.041223809123039246, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001964039751328528, "signal/batch_coverage_1/centered_abs_mean": 0.13734543919563294, "signal/batch_coverage_1/group_std_mean": 0.1743619203567505, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.041223809123039246, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001964039751328528, "signal/batch_coverage_10/centered_abs_mean": 0.14575589895248414, "signal/batch_coverage_10/group_std_mean": 0.18675495088100433, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043697334825992584, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020843093283474446, "signal/batch_coverage_15/centered_abs_mean": 0.14540058076381684, "signal/batch_coverage_15/group_std_mean": 0.18637823164463044, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04360126554965973, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020792283583432438, "signal/batch_coverage_20/centered_abs_mean": 0.14694512784481048, "signal/batch_coverage_20/group_std_mean": 0.18890998363494874, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04405587539076805, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002101315325126052, "signal/batch_coverage_25/centered_abs_mean": 0.14675863683223725, "signal/batch_coverage_25/group_std_mean": 0.18870731592178344, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04401228204369545, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002098648436367512, "signal/batch_coverage_5/centered_abs_mean": 0.14158134162425995, "signal/batch_coverage_5/group_std_mean": 0.18014432191848756, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04245214462280274, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020246132742613556, "signal/brier_reward/centered_abs_mean": 0.0953995257616043, "signal/brier_reward/group_std_mean": 0.12400536090135575, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19989734292030334, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009539952501654625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014217448234558106, "signal/confidence_uniqueness_reward/group_std_mean": 0.017744265496730804, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029874777793884276, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001421744842082262, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2862050950527191, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3593140959739685, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6001887321472168, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028620509803295134, "step": 260 }, { "calibration/aurc": 0.27905522292217644, "calibration/batch_distribution_entropy": 0.9632583305472959, "calibration/buffer_distribution_entropy": 0.988097658160075, "calibration/confidence_entropy": 0.45924203980536704, "calibration/coverage@0%": 0.07421875, "calibration/coverage@1%": 0.090234375, "calibration/coverage@10%": 0.2296875, "calibration/coverage@15%": 0.39765625, "calibration/coverage@20%": 0.466796875, "calibration/coverage@25%": 0.505859375, "calibration/coverage@30%": 0.5421875, "calibration/coverage@5%": 0.116015625, "calibration/ece": 0.15995092522476045, "calibration/mean_confidence": 0.5370619842603217, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 926.8, "completions/max_terminated_length": 926.8, "completions/mean_length": 250.95556640625, "completions/mean_terminated_length": 251.10352478027343, "completions/min_length": 25.0, "completions/min_terminated_length": 125.6, "epoch": 0.848, "grad_norm": 0.006838792935013771, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 921818142.0, "reward": 0.9706242799758911, "reward_std": 0.07961578965187073, "rewards/accuracy_reward": 0.54248046875, "rewards/batch_coverage_0": 0.4266131818294525, "rewards/batch_coverage_1": 0.4266131818294525, "rewards/batch_coverage_10": 0.45701671838760377, "rewards/batch_coverage_15": 0.4607600450515747, "rewards/batch_coverage_20": 0.46680898666381837, "rewards/batch_coverage_25": 0.4687318027019501, "rewards/batch_coverage_5": 0.43967961668968203, "rewards/brier_reward": 0.8209251642227173, "rewards/confidence_uniqueness_reward": 0.9507812619209289, "rewards/format_reward": 0.9994140625, "rewards/frontier_entropy_batch_reward": -0.2248463362455368, "signal/accuracy_reward/centered_abs_mean": 0.075408935546875, "signal/accuracy_reward/group_std_mean": 0.10307029783725738, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7802480101585388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0377044677734375, "signal/advantage_abs_mean": 0.763916802406311, "signal/advantage_pre_scale_abs_mean": 0.059930368512868884, "signal/advantage_pre_scale_std": 0.10379154682159424, "signal/advantage_std": 0.9824334859848023, "signal/batch_coverage_0/centered_abs_mean": 0.13436878621578216, "signal/batch_coverage_0/group_std_mean": 0.17151402235031127, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04007416889071465, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019214736763387918, "signal/batch_coverage_1/centered_abs_mean": 0.13436878621578216, "signal/batch_coverage_1/group_std_mean": 0.17151402235031127, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04007416889071465, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019214736763387918, "signal/batch_coverage_10/centered_abs_mean": 0.14380245208740233, "signal/batch_coverage_10/group_std_mean": 0.18498321771621704, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04290038496255875, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002056375052779913, "signal/batch_coverage_15/centered_abs_mean": 0.14388126730918885, "signal/batch_coverage_15/group_std_mean": 0.18545418381690978, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04295293316245079, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020575019530951976, "signal/batch_coverage_20/centered_abs_mean": 0.14691962897777558, "signal/batch_coverage_20/group_std_mean": 0.18964103162288665, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04389960765838623, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.00210095071233809, "signal/batch_coverage_25/centered_abs_mean": 0.14909811317920685, "signal/batch_coverage_25/group_std_mean": 0.19255113303661348, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04451926797628403, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021321029867976903, "signal/batch_coverage_5/centered_abs_mean": 0.1378627151250839, "signal/batch_coverage_5/group_std_mean": 0.17606137692928314, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04110623449087143, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.001971436757594347, "signal/brier_reward/centered_abs_mean": 0.098325015604496, "signal/brier_reward/group_std_mean": 0.1293652281165123, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20545124709606172, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009832501597702503, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013685688003897667, "signal/confidence_uniqueness_reward/group_std_mean": 0.01880355179309845, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028652074560523034, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013685687445104122, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011606083437800407, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2727161109447479, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3431577146053314, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5705787897109985, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027271611243486406, "step": 265 }, { "calibration/aurc": 0.2567896225592432, "calibration/batch_distribution_entropy": 0.9399621317744724, "calibration/buffer_distribution_entropy": 0.9888205668127513, "calibration/confidence_entropy": 0.46848315954934144, "calibration/coverage@0%": 0.010953583059936304, "calibration/coverage@1%": 0.010953583059936304, "calibration/coverage@10%": 0.18382429384425003, "calibration/coverage@15%": 0.24530100953052453, "calibration/coverage@20%": 0.28050934286385787, "calibration/coverage@25%": 0.4496928600207206, "calibration/coverage@30%": 0.6899701271775834, "calibration/coverage@5%": 0.0937660830599363, "calibration/ece": 0.14122779695873705, "calibration/mean_confidence": 0.6172851125428632, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1024.2, "completions/max_terminated_length": 1024.2, "completions/mean_length": 246.3564453125, "completions/mean_terminated_length": 246.52633056640624, "completions/min_length": 21.0, "completions/min_terminated_length": 122.8, "epoch": 0.864, "grad_norm": 0.007001855410635471, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 939327648.0, "reward": 0.9944598317146301, "reward_std": 0.08389261215925217, "rewards/accuracy_reward": 0.6052734375, "rewards/batch_coverage_0": 0.4061899304389954, "rewards/batch_coverage_1": 0.4061899304389954, "rewards/batch_coverage_10": 0.43846317529678347, "rewards/batch_coverage_15": 0.444266802072525, "rewards/batch_coverage_20": 0.4477324843406677, "rewards/batch_coverage_25": 0.45078660249710084, "rewards/batch_coverage_5": 0.4176724016666412, "rewards/brier_reward": 0.812709105014801, "rewards/confidence_uniqueness_reward": 0.9477102398872376, "rewards/format_reward": 0.99921875, "rewards/frontier_entropy_batch_reward": -0.2688983857631683, "signal/accuracy_reward/centered_abs_mean": 0.0864501953125, "signal/accuracy_reward/group_std_mean": 0.1119039848446846, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8440386652946472, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04322509765625, "signal/advantage_abs_mean": 0.7768685460090637, "signal/advantage_pre_scale_abs_mean": 0.06493140161037445, "signal/advantage_pre_scale_std": 0.10726050138473511, "signal/advantage_std": 0.9825407266616821, "signal/batch_coverage_0/centered_abs_mean": 0.1418234884738922, "signal/batch_coverage_0/group_std_mean": 0.17914320528507233, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.040483998507261275, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020280758617445827, "signal/batch_coverage_1/centered_abs_mean": 0.1418234884738922, "signal/batch_coverage_1/group_std_mean": 0.17914320528507233, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.040483998507261275, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020280758617445827, "signal/batch_coverage_10/centered_abs_mean": 0.15116735100746154, "signal/batch_coverage_10/group_std_mean": 0.19266805946826934, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04304313659667969, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021616931073367594, "signal/batch_coverage_15/centered_abs_mean": 0.1520349085330963, "signal/batch_coverage_15/group_std_mean": 0.1941957652568817, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04327462837100029, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002174099301919341, "signal/batch_coverage_20/centered_abs_mean": 0.15282979607582092, "signal/batch_coverage_20/group_std_mean": 0.19580250084400178, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0434773713350296, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002185466093942523, "signal/batch_coverage_25/centered_abs_mean": 0.15083151459693908, "signal/batch_coverage_25/group_std_mean": 0.1939299464225769, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.042951537668704985, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002156890695914626, "signal/batch_coverage_5/centered_abs_mean": 0.14520049095153809, "signal/batch_coverage_5/group_std_mean": 0.18340204358100892, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04142942652106285, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020763671025633814, "signal/brier_reward/centered_abs_mean": 0.10041524767875672, "signal/brier_reward/group_std_mean": 0.13036752492189407, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19795797765254974, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010041524842381477, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015039702318608761, "signal/confidence_uniqueness_reward/group_std_mean": 0.020908067747950555, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029788796231150626, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015039702178910376, "signal/format_reward/centered_abs_mean": 0.00150146484375, "signal/format_reward/group_std_mean": 0.00408310885541141, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014261398650705814, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000750732421875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2864925265312195, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35601104497909547, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5675512373447418, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028649253770709038, "step": 270 }, { "calibration/aurc": 0.3391713719809212, "calibration/batch_distribution_entropy": 0.9652309050420375, "calibration/buffer_distribution_entropy": 0.9887706535251917, "calibration/confidence_entropy": 0.44366992640643527, "calibration/coverage@0%": 0.011349571078431372, "calibration/coverage@1%": 0.011349571078431372, "calibration/coverage@10%": 0.048590686274509805, "calibration/coverage@15%": 0.12131127450980392, "calibration/coverage@20%": 0.20539981617647057, "calibration/coverage@25%": 0.30826746323529414, "calibration/coverage@30%": 0.40834405637254906, "calibration/coverage@5%": 0.02350643382352941, "calibration/ece": 0.12908799058345438, "calibration/mean_confidence": 0.49746322064845183, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 789.8, "completions/max_terminated_length": 789.8, "completions/mean_length": 241.1212890625, "completions/mean_terminated_length": 241.28960876464845, "completions/min_length": 42.2, "completions/min_terminated_length": 117.0, "epoch": 0.88, "grad_norm": 0.00824071653187275, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 956943802.0, "reward": 0.9564361095428466, "reward_std": 0.08327271789312363, "rewards/accuracy_reward": 0.523046875, "rewards/batch_coverage_0": 0.42483463883399963, "rewards/batch_coverage_1": 0.42483463883399963, "rewards/batch_coverage_10": 0.44779890179634096, "rewards/batch_coverage_15": 0.4517851173877716, "rewards/batch_coverage_20": 0.45584348440170286, "rewards/batch_coverage_25": 0.46056185364723207, "rewards/batch_coverage_5": 0.4392197012901306, "rewards/brier_reward": 0.8138040423393249, "rewards/confidence_uniqueness_reward": 0.9495703697204589, "rewards/format_reward": 0.99931640625, "rewards/frontier_entropy_batch_reward": -0.25482745170593263, "signal/accuracy_reward/centered_abs_mean": 0.08291015625, "signal/accuracy_reward/group_std_mean": 0.1109766572713852, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8130660891532898, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.041455078125, "signal/advantage_abs_mean": 0.7675025463104248, "signal/advantage_pre_scale_abs_mean": 0.0639819398522377, "signal/advantage_pre_scale_std": 0.1059723898768425, "signal/advantage_std": 0.9825579881668091, "signal/batch_coverage_0/centered_abs_mean": 0.13871148526668547, "signal/batch_coverage_0/group_std_mean": 0.17681510448455812, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03895730599761009, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019835742190480233, "signal/batch_coverage_1/centered_abs_mean": 0.13871148526668547, "signal/batch_coverage_1/group_std_mean": 0.17681510448455812, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03895730599761009, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019835742190480233, "signal/batch_coverage_10/centered_abs_mean": 0.14528686702251434, "signal/batch_coverage_10/group_std_mean": 0.18570451736450194, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040792569518089294, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020776021527126433, "signal/batch_coverage_15/centered_abs_mean": 0.1425869882106781, "signal/batch_coverage_15/group_std_mean": 0.18342476487159728, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040037110447883606, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002038993942551315, "signal/batch_coverage_20/centered_abs_mean": 0.14162907898426055, "signal/batch_coverage_20/group_std_mean": 0.18285691738128662, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03976234272122383, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020252959337085487, "signal/batch_coverage_25/centered_abs_mean": 0.14475294947624207, "signal/batch_coverage_25/group_std_mean": 0.1870112508535385, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04063992574810982, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020699672866612675, "signal/batch_coverage_5/centered_abs_mean": 0.14309484958648683, "signal/batch_coverage_5/group_std_mean": 0.1825674444437027, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04018484801054001, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002046256372705102, "signal/brier_reward/centered_abs_mean": 0.10405312627553939, "signal/brier_reward/group_std_mean": 0.13430474400520326, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20426113307476043, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010405313037335873, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014735890924930573, "signal/confidence_uniqueness_reward/group_std_mean": 0.020338327810168266, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028994759172201158, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014735890785232187, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990189999342, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013323342800140381, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29404598474502563, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3665463447570801, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5783021211624145, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029404599219560623, "step": 275 }, { "calibration/aurc": 0.31771111015887915, "calibration/batch_distribution_entropy": 0.974324873774067, "calibration/buffer_distribution_entropy": 0.9886315735465725, "calibration/confidence_entropy": 0.4536923645244853, "calibration/coverage@0%": 0.04219361545988258, "calibration/coverage@1%": 0.04609986545988258, "calibration/coverage@10%": 0.15392153864970645, "calibration/coverage@15%": 0.19650577910958905, "calibration/coverage@20%": 0.34651877446183954, "calibration/coverage@25%": 0.4226929427592955, "calibration/coverage@30%": 0.4926293419765166, "calibration/coverage@5%": 0.06133424045988258, "calibration/ece": 0.18039484157785943, "calibration/mean_confidence": 0.5008349061997756, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 936.6, "completions/max_terminated_length": 936.6, "completions/mean_length": 239.8802734375, "completions/mean_terminated_length": 239.9035400390625, "completions/min_length": 99.8, "completions/min_terminated_length": 122.0, "epoch": 0.896, "grad_norm": 0.006939719431102276, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 974511024.0, "reward": 0.9786574125289917, "reward_std": 0.07587149292230606, "rewards/accuracy_reward": 0.56845703125, "rewards/batch_coverage_0": 0.41372602581977846, "rewards/batch_coverage_1": 0.41372602581977846, "rewards/batch_coverage_10": 0.449049574136734, "rewards/batch_coverage_15": 0.45231945514678956, "rewards/batch_coverage_20": 0.457261198759079, "rewards/batch_coverage_25": 0.45867209434509276, "rewards/batch_coverage_5": 0.4332360863685608, "rewards/brier_reward": 0.8007945418357849, "rewards/confidence_uniqueness_reward": 0.950228500366211, "rewards/format_reward": 0.99990234375, "rewards/frontier_entropy_batch_reward": -0.24639837741851806, "signal/accuracy_reward/centered_abs_mean": 0.072772216796875, "signal/accuracy_reward/group_std_mean": 0.10187921673059464, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7487026810646057, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0363861083984375, "signal/advantage_abs_mean": 0.7618164420127869, "signal/advantage_pre_scale_abs_mean": 0.05696792304515839, "signal/advantage_pre_scale_std": 0.09597984254360199, "signal/advantage_std": 0.9824573516845703, "signal/batch_coverage_0/centered_abs_mean": 0.13853881061077117, "signal/batch_coverage_0/group_std_mean": 0.1770318329334259, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.040990565717220304, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.001981105003505945, "signal/batch_coverage_1/centered_abs_mean": 0.13853881061077117, "signal/batch_coverage_1/group_std_mean": 0.1770318329334259, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.040990565717220304, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.001981105003505945, "signal/batch_coverage_10/centered_abs_mean": 0.14625149667263032, "signal/batch_coverage_10/group_std_mean": 0.18902421295642852, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043303582072258, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002091396367177367, "signal/batch_coverage_15/centered_abs_mean": 0.14543745517730713, "signal/batch_coverage_15/group_std_mean": 0.1879607081413269, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04308133721351624, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020797555334866047, "signal/batch_coverage_20/centered_abs_mean": 0.14614311456680298, "signal/batch_coverage_20/group_std_mean": 0.1893752932548523, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04328845590353012, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020898465532809495, "signal/batch_coverage_25/centered_abs_mean": 0.14725618660449982, "signal/batch_coverage_25/group_std_mean": 0.19075983464717866, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.043608113378286364, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021057635080069304, "signal/batch_coverage_5/centered_abs_mean": 0.14275825321674346, "signal/batch_coverage_5/group_std_mean": 0.1829033762216568, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04225177988409996, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020414429251104592, "signal/brier_reward/centered_abs_mean": 0.09953672885894775, "signal/brier_reward/group_std_mean": 0.1298005163669586, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20504209995269776, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009953673183917999, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013294638693332672, "signal/confidence_uniqueness_reward/group_std_mean": 0.016960232332348824, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027427341789007187, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013294639065861702, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018184378743171692, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2813436031341553, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35305030941963195, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5798410654067994, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028134360536932947, "step": 280 }, { "calibration/aurc": 0.30828852543253227, "calibration/batch_distribution_entropy": 0.9660153375103102, "calibration/buffer_distribution_entropy": 0.988424190292647, "calibration/confidence_entropy": 0.45257516243344964, "calibration/coverage@0%": 0.034765625, "calibration/coverage@1%": 0.034765625, "calibration/coverage@10%": 0.158203125, "calibration/coverage@15%": 0.280859375, "calibration/coverage@20%": 0.374609375, "calibration/coverage@25%": 0.45390625, "calibration/coverage@30%": 0.54609375, "calibration/coverage@5%": 0.116796875, "calibration/ece": 0.1127079457086764, "calibration/mean_confidence": 0.5327337166604502, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 794.4, "completions/max_terminated_length": 794.4, "completions/mean_length": 238.48564453125, "completions/mean_terminated_length": 238.6263916015625, "completions/min_length": 24.0, "completions/min_terminated_length": 120.6, "epoch": 0.912, "grad_norm": 0.0068150414153933525, "learning_rate": 1e-06, "loss": -0.0043, "num_tokens": 992004413.0, "reward": 0.9777844548225403, "reward_std": 0.07660511136054993, "rewards/accuracy_reward": 0.558203125, "rewards/batch_coverage_0": 0.4496914088726044, "rewards/batch_coverage_1": 0.4496914088726044, "rewards/batch_coverage_10": 0.4754992663860321, "rewards/batch_coverage_15": 0.4782718360424042, "rewards/batch_coverage_20": 0.48222748637199403, "rewards/batch_coverage_25": 0.4871933341026306, "rewards/batch_coverage_5": 0.4609818339347839, "rewards/brier_reward": 0.8314094185829163, "rewards/confidence_uniqueness_reward": 0.947171938419342, "rewards/format_reward": 0.9994140625, "rewards/frontier_entropy_batch_reward": -0.25837140083312987, "signal/accuracy_reward/centered_abs_mean": 0.07244873046875, "signal/accuracy_reward/group_std_mean": 0.09671394675970077, "signal/accuracy_reward/group_zero_std_frac": 0.71875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7836610794067382, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.036224365234375, "signal/advantage_abs_mean": 0.7689519405364991, "signal/advantage_pre_scale_abs_mean": 0.05903713330626488, "signal/advantage_pre_scale_std": 0.10071672052145005, "signal/advantage_std": 0.9823365330696106, "signal/batch_coverage_0/centered_abs_mean": 0.14369446337223052, "signal/batch_coverage_0/group_std_mean": 0.18067781627178192, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04487623497843742, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002054830873385072, "signal/batch_coverage_1/centered_abs_mean": 0.14369446337223052, "signal/batch_coverage_1/group_std_mean": 0.18067781627178192, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04487623497843742, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002054830873385072, "signal/batch_coverage_10/centered_abs_mean": 0.14944251179695128, "signal/batch_coverage_10/group_std_mean": 0.18889077603816987, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04679513275623322, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021370279137045145, "signal/batch_coverage_15/centered_abs_mean": 0.15104621648788452, "signal/batch_coverage_15/group_std_mean": 0.19096899330615996, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0472504124045372, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002159960940480232, "signal/batch_coverage_20/centered_abs_mean": 0.15283162593841554, "signal/batch_coverage_20/group_std_mean": 0.1936648577451706, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.047779142111539843, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021854921244084833, "signal/batch_coverage_25/centered_abs_mean": 0.15555560290813447, "signal/batch_coverage_25/group_std_mean": 0.19730362594127654, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04867595061659813, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002224445063620806, "signal/batch_coverage_5/centered_abs_mean": 0.14733441770076752, "signal/batch_coverage_5/group_std_mean": 0.18538997173309327, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.046044493466615675, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002106882119551301, "signal/brier_reward/centered_abs_mean": 0.0970422387123108, "signal/brier_reward/group_std_mean": 0.12534932047128677, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.211318901181221, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00970422402024269, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01565753761678934, "signal/confidence_uniqueness_reward/group_std_mean": 0.021441229432821274, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03439625911414623, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015657537849619984, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011679522693157196, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2794118285179138, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3517456650733948, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6118232488632203, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027941183745861055, "step": 285 }, { "calibration/aurc": 0.3555222457423709, "calibration/batch_distribution_entropy": 0.9740519445999197, "calibration/buffer_distribution_entropy": 0.9891156696064547, "calibration/confidence_entropy": 0.48150519224400945, "calibration/coverage@0%": 0.0234375, "calibration/coverage@1%": 0.0234375, "calibration/coverage@10%": 0.089453125, "calibration/coverage@15%": 0.145703125, "calibration/coverage@20%": 0.18515625, "calibration/coverage@25%": 0.230078125, "calibration/coverage@30%": 0.3078125, "calibration/coverage@5%": 0.041015625, "calibration/ece": 0.10356664177993731, "calibration/mean_confidence": 0.4613492720779721, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 831.0, "completions/max_terminated_length": 831.0, "completions/mean_length": 236.5126953125, "completions/mean_terminated_length": 236.53578491210936, "completions/min_length": 98.2, "completions/min_terminated_length": 121.4, "epoch": 0.928, "grad_norm": 0.006446013692766428, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 1009453119.0, "reward": 0.9647042751312256, "reward_std": 0.07521760016679764, "rewards/accuracy_reward": 0.5376953125, "rewards/batch_coverage_0": 0.4267537951469421, "rewards/batch_coverage_1": 0.4267537951469421, "rewards/batch_coverage_10": 0.45175575017929076, "rewards/batch_coverage_15": 0.4577596127986908, "rewards/batch_coverage_20": 0.46226202249526976, "rewards/batch_coverage_25": 0.4647360146045685, "rewards/batch_coverage_5": 0.4439967393875122, "rewards/brier_reward": 0.8195161938667297, "rewards/confidence_uniqueness_reward": 0.9499494791030884, "rewards/format_reward": 0.99990234375, "rewards/frontier_entropy_batch_reward": -0.25857561230659487, "signal/accuracy_reward/centered_abs_mean": 0.07042236328125, "signal/accuracy_reward/group_std_mean": 0.09534862488508225, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6946340084075928, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035211181640625, "signal/advantage_abs_mean": 0.7717218399047852, "signal/advantage_pre_scale_abs_mean": 0.058029332756996156, "signal/advantage_pre_scale_std": 0.09526041895151138, "signal/advantage_std": 0.9825134754180909, "signal/batch_coverage_0/centered_abs_mean": 0.14040791988372803, "signal/batch_coverage_0/group_std_mean": 0.17896082103252411, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.040377072244882586, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002007833169773221, "signal/batch_coverage_1/centered_abs_mean": 0.14040791988372803, "signal/batch_coverage_1/group_std_mean": 0.17896082103252411, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.040377072244882586, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002007833169773221, "signal/batch_coverage_10/centered_abs_mean": 0.14643791019916536, "signal/batch_coverage_10/group_std_mean": 0.18706798255443574, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04210822582244873, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002094061998650432, "signal/batch_coverage_15/centered_abs_mean": 0.14486917555332185, "signal/batch_coverage_15/group_std_mean": 0.1849692642688751, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041620150208473206, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002071629255078733, "signal/batch_coverage_20/centered_abs_mean": 0.14694713354110717, "signal/batch_coverage_20/group_std_mean": 0.18772556483745576, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04221231043338776, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002101344126276672, "signal/batch_coverage_25/centered_abs_mean": 0.14691944420337677, "signal/batch_coverage_25/group_std_mean": 0.18790694773197175, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04220572412014008, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002100948058068752, "signal/batch_coverage_5/centered_abs_mean": 0.1435709625482559, "signal/batch_coverage_5/group_std_mean": 0.18303219974040985, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04129085242748261, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002053064666688442, "signal/brier_reward/centered_abs_mean": 0.0938475176692009, "signal/brier_reward/group_std_mean": 0.1227958619594574, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18740336000919341, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009384752437472343, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01345120333135128, "signal/confidence_uniqueness_reward/group_std_mean": 0.017086662724614142, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027048880234360695, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013451203238219022, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017758136615157127, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29375959038734434, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36533265709877016, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5884610295295716, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029375958442687988, "step": 290 }, { "calibration/aurc": 0.2229863112287252, "calibration/batch_distribution_entropy": 0.9637147094213729, "calibration/buffer_distribution_entropy": 0.9898969165099345, "calibration/confidence_entropy": 0.45262899153877767, "calibration/coverage@0%": 0.0421875, "calibration/coverage@1%": 0.0421875, "calibration/coverage@10%": 0.32890625, "calibration/coverage@15%": 0.410546875, "calibration/coverage@20%": 0.523828125, "calibration/coverage@25%": 0.603125, "calibration/coverage@30%": 0.683984375, "calibration/coverage@5%": 0.187109375, "calibration/ece": 0.09124665911387694, "calibration/mean_confidence": 0.48950484950374407, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 635.6, "completions/max_terminated_length": 635.6, "completions/mean_length": 236.31083984375, "completions/mean_terminated_length": 236.3336669921875, "completions/min_length": 96.2, "completions/min_terminated_length": 116.6, "epoch": 0.944, "grad_norm": 0.0067446487955749035, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 1026848366.0, "reward": 0.9670644760131836, "reward_std": 0.08264310508966446, "rewards/accuracy_reward": 0.54658203125, "rewards/batch_coverage_0": 0.41733229756355283, "rewards/batch_coverage_1": 0.41733229756355283, "rewards/batch_coverage_10": 0.45554951429367063, "rewards/batch_coverage_15": 0.457538378238678, "rewards/batch_coverage_20": 0.46042126417160034, "rewards/batch_coverage_25": 0.46144301891326905, "rewards/batch_coverage_5": 0.4349259316921234, "rewards/brier_reward": 0.8223084926605224, "rewards/confidence_uniqueness_reward": 0.9484071016311646, "rewards/format_reward": 0.99990234375, "rewards/frontier_entropy_batch_reward": -0.2764424979686737, "signal/accuracy_reward/centered_abs_mean": 0.087493896484375, "signal/accuracy_reward/group_std_mean": 0.1149898737668991, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8754186749458313, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0437469482421875, "signal/advantage_abs_mean": 0.7771499872207641, "signal/advantage_pre_scale_abs_mean": 0.06411685273051262, "signal/advantage_pre_scale_std": 0.10613652616739273, "signal/advantage_std": 0.9825153589248657, "signal/batch_coverage_0/centered_abs_mean": 0.13396848738193512, "signal/batch_coverage_0/group_std_mean": 0.16862494945526124, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.038404418528079985, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019157493952661753, "signal/batch_coverage_1/centered_abs_mean": 0.13396848738193512, "signal/batch_coverage_1/group_std_mean": 0.16862494945526124, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.038404418528079985, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019157493952661753, "signal/batch_coverage_10/centered_abs_mean": 0.143884015083313, "signal/batch_coverage_10/group_std_mean": 0.18276259899139405, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04123903587460518, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020575413946062326, "signal/batch_coverage_15/centered_abs_mean": 0.1433136433362961, "signal/batch_coverage_15/group_std_mean": 0.18256149888038636, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041087044030427934, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020493850577622654, "signal/batch_coverage_20/centered_abs_mean": 0.14255596399307252, "signal/batch_coverage_20/group_std_mean": 0.18211045265197753, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04089748486876488, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020385502837598326, "signal/batch_coverage_25/centered_abs_mean": 0.14203818142414093, "signal/batch_coverage_25/group_std_mean": 0.1817559063434601, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04073741212487221, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002031145920045674, "signal/batch_coverage_5/centered_abs_mean": 0.1392223507165909, "signal/batch_coverage_5/group_std_mean": 0.175718292593956, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03989290744066239, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019908795831725, "signal/brier_reward/centered_abs_mean": 0.09500806033611298, "signal/brier_reward/group_std_mean": 0.1222013533115387, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19000862538814545, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009500806406140327, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014246679656207562, "signal/confidence_uniqueness_reward/group_std_mean": 0.018000596016645432, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028644410893321037, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014246679609641432, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019225865602493287, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2949823498725891, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36604434847831724, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5930037498474121, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029498234018683435, "step": 295 }, { "calibration/aurc": 0.3204940128617194, "calibration/batch_distribution_entropy": 0.9580938856768244, "calibration/buffer_distribution_entropy": 0.9896996163940806, "calibration/confidence_entropy": 0.4399139243375204, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.10859375, "calibration/coverage@15%": 0.242578125, "calibration/coverage@20%": 0.275390625, "calibration/coverage@25%": 0.356640625, "calibration/coverage@30%": 0.4125, "calibration/coverage@5%": 0.005078125, "calibration/ece": 0.1301704799550848, "calibration/mean_confidence": 0.5383094842630045, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 615.8, "completions/max_terminated_length": 615.8, "completions/mean_length": 232.53017578125, "completions/mean_terminated_length": 232.53017578125, "completions/min_length": 123.4, "completions/min_terminated_length": 123.4, "epoch": 0.96, "grad_norm": 0.0065346090123057365, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 1044169795.0, "reward": 0.9667318820953369, "reward_std": 0.07006315439939499, "rewards/accuracy_reward": 0.53857421875, "rewards/batch_coverage_0": 0.451965457201004, "rewards/batch_coverage_1": 0.451965457201004, "rewards/batch_coverage_10": 0.487179833650589, "rewards/batch_coverage_15": 0.4893311381340027, "rewards/batch_coverage_20": 0.49355667233467104, "rewards/batch_coverage_25": 0.4953798890113831, "rewards/batch_coverage_5": 0.4693809628486633, "rewards/brier_reward": 0.8260141253471375, "rewards/confidence_uniqueness_reward": 0.9484649658203125, "rewards/format_reward": 1.0, "rewards/frontier_entropy_batch_reward": -0.27747427225112914, "signal/accuracy_reward/centered_abs_mean": 0.061724853515625, "signal/accuracy_reward/group_std_mean": 0.08646569401025772, "signal/accuracy_reward/group_zero_std_frac": 0.734375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6621787786483765, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0308624267578125, "signal/advantage_abs_mean": 0.7726797819137573, "signal/advantage_pre_scale_abs_mean": 0.05308959484100342, "signal/advantage_pre_scale_std": 0.08946077674627304, "signal/advantage_std": 0.9823702216148377, "signal/batch_coverage_0/centered_abs_mean": 0.1396566614508629, "signal/batch_coverage_0/group_std_mean": 0.17796612679958343, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.043179111927747725, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019970902940258384, "signal/batch_coverage_1/centered_abs_mean": 0.1396566614508629, "signal/batch_coverage_1/group_std_mean": 0.17796612679958343, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.043179111927747725, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019970902940258384, "signal/batch_coverage_10/centered_abs_mean": 0.14766047298908233, "signal/batch_coverage_10/group_std_mean": 0.18960849642753602, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04558332860469818, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021115447394549846, "signal/batch_coverage_15/centered_abs_mean": 0.14853170812129973, "signal/batch_coverage_15/group_std_mean": 0.1907793253660202, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04583237245678902, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002124003367498517, "signal/batch_coverage_20/centered_abs_mean": 0.15066201984882355, "signal/batch_coverage_20/group_std_mean": 0.1938926547765732, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04643941894173622, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021544668823480608, "signal/batch_coverage_25/centered_abs_mean": 0.14919013679027557, "signal/batch_coverage_25/group_std_mean": 0.1926431655883789, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04591233804821968, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002133418945595622, "signal/batch_coverage_5/centered_abs_mean": 0.1437687397003174, "signal/batch_coverage_5/group_std_mean": 0.18378321528434755, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04439370557665825, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020558929536491632, "signal/brier_reward/centered_abs_mean": 0.09213972389698029, "signal/brier_reward/group_std_mean": 0.11985861659049987, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1980680286884308, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009213972836732864, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014541387557983398, "signal/confidence_uniqueness_reward/group_std_mean": 0.018049951083958148, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03146158419549465, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014541388023644685, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30066679120063783, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37262142896652223, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6474065899848938, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030066679790616037, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.4146560871220133, "eval_calibration/batch_distribution_entropy": 0.9208862479532871, "eval_calibration/buffer_distribution_entropy": 0.9888979645233786, "eval_calibration/confidence_entropy": 0.4375698210434617, "eval_calibration/coverage@0%": 0.078125, "eval_calibration/coverage@1%": 0.078125, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.0859375, "eval_calibration/coverage@20%": 0.265625, "eval_calibration/coverage@25%": 0.34375, "eval_calibration/coverage@30%": 0.390625, "eval_calibration/coverage@5%": 0.078125, "eval_calibration/ece": 0.18276220207815103, "eval_calibration/mean_confidence": 0.4358244321820579, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 424.0, "eval_completions/max_terminated_length": 424.0, "eval_completions/mean_length": 233.96672821044922, "eval_completions/mean_terminated_length": 233.96672821044922, "eval_completions/min_length": 136.5, "eval_completions/min_terminated_length": 136.5, "eval_loss": 0.0, "eval_num_tokens": 1044169795.0, "eval_reward": 0.8108105212450027, "eval_reward_std": 0.2273394763469696, "eval_rewards/accuracy_reward": 0.44921875, "eval_rewards/batch_coverage_0": 0.17630108073353767, "eval_rewards/batch_coverage_1": 0.17630108073353767, "eval_rewards/batch_coverage_10": 0.17482585459947586, "eval_rewards/batch_coverage_15": 0.16643786057829857, "eval_rewards/batch_coverage_20": 0.12921502068638802, "eval_rewards/batch_coverage_25": 0.10374573059380054, "eval_rewards/batch_coverage_5": 0.17630108073353767, "eval_rewards/brier_reward": 0.8116861432790756, "eval_rewards/confidence_uniqueness_reward": 0.892578125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 21.4639, "eval_samples_per_second": 23.295, "eval_signal/accuracy_reward/centered_abs_mean": 0.47900390625, "eval_signal/accuracy_reward/group_std_mean": 0.4969187304377556, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0559919476509094, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.239501953125, "eval_signal/advantage_abs_mean": 0.9430637061595917, "eval_signal/advantage_pre_scale_abs_mean": 0.21488191559910774, "eval_signal/advantage_pre_scale_std": 0.2248552180826664, "eval_signal/advantage_std": 0.9876824915409088, "eval_signal/batch_coverage_0/centered_abs_mean": 0.3311324641108513, "eval_signal/batch_coverage_0/group_std_mean": 0.3937782421708107, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.021000605076551437, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004735194204840809, "eval_signal/batch_coverage_1/centered_abs_mean": 0.3311324641108513, "eval_signal/batch_coverage_1/group_std_mean": 0.3937782421708107, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.021000605076551437, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004735194204840809, "eval_signal/batch_coverage_10/centered_abs_mean": 0.325124341994524, "eval_signal/batch_coverage_10/group_std_mean": 0.3854726776480675, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020634129643440247, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004649278009310365, "eval_signal/batch_coverage_15/centered_abs_mean": 0.28606269136071205, "eval_signal/batch_coverage_15/group_std_mean": 0.3394397348165512, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01813029870390892, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.004090696515049785, "eval_signal/batch_coverage_20/centered_abs_mean": 0.19810736551880836, "eval_signal/batch_coverage_20/group_std_mean": 0.2412240207195282, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012629861943423748, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0028329353663139045, "eval_signal/batch_coverage_25/centered_abs_mean": 0.16219930350780487, "eval_signal/batch_coverage_25/group_std_mean": 0.2030845247209072, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.010249435435980558, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0023194500245153904, "eval_signal/batch_coverage_5/centered_abs_mean": 0.3311324641108513, "eval_signal/batch_coverage_5/group_std_mean": 0.3937782421708107, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.021000605076551437, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004735194204840809, "eval_signal/brier_reward/centered_abs_mean": 0.18613890558481216, "eval_signal/brier_reward/group_std_mean": 0.24047620594501495, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0822746567428112, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01861389074474573, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04364013671875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0524612283334136, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01921792607754469, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004364013671875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.186, "step": 300 }, { "calibration/aurc": 0.23395873459072755, "calibration/batch_distribution_entropy": 0.969196375655712, "calibration/buffer_distribution_entropy": 0.9887961994688961, "calibration/confidence_entropy": 0.4758519319323664, "calibration/coverage@0%": 0.05312958659491194, "calibration/coverage@1%": 0.05312958659491194, "calibration/coverage@10%": 0.3391496453033268, "calibration/coverage@15%": 0.4352747370352251, "calibration/coverage@20%": 0.51537655944227, "calibration/coverage@25%": 0.5962818003913894, "calibration/coverage@30%": 0.6365429305283757, "calibration/coverage@5%": 0.19614878913894324, "calibration/ece": 0.14624253957562217, "calibration/mean_confidence": 0.4805521077708187, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 628.2, "completions/max_terminated_length": 628.2, "completions/mean_length": 235.7103515625, "completions/mean_terminated_length": 235.73360595703124, "completions/min_length": 97.2, "completions/min_terminated_length": 123.4, "epoch": 0.976, "grad_norm": 0.008149566128849983, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 1061444589.0, "reward": 0.9804271697998047, "reward_std": 0.07212701141834259, "rewards/accuracy_reward": 0.57001953125, "rewards/batch_coverage_0": 0.4203511416912079, "rewards/batch_coverage_1": 0.4203511416912079, "rewards/batch_coverage_10": 0.4480322599411011, "rewards/batch_coverage_15": 0.4536835730075836, "rewards/batch_coverage_20": 0.4561052739620209, "rewards/batch_coverage_25": 0.45771525502204896, "rewards/batch_coverage_5": 0.43514758348464966, "rewards/brier_reward": 0.8058765411376954, "rewards/confidence_uniqueness_reward": 0.9504483699798584, "rewards/format_reward": 0.99990234375, "rewards/frontier_entropy_batch_reward": -0.24373058676719667, "signal/accuracy_reward/centered_abs_mean": 0.066033935546875, "signal/accuracy_reward/group_std_mean": 0.09485945627093315, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6698218882083893, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0330169677734375, "signal/advantage_abs_mean": 0.762388014793396, "signal/advantage_pre_scale_abs_mean": 0.05398530513048172, "signal/advantage_pre_scale_std": 0.08997991234064102, "signal/advantage_std": 0.9824227690696716, "signal/batch_coverage_0/centered_abs_mean": 0.1356405645608902, "signal/batch_coverage_0/group_std_mean": 0.17252211272716522, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0407505564391613, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019396600546315312, "signal/batch_coverage_1/centered_abs_mean": 0.1356405645608902, "signal/batch_coverage_1/group_std_mean": 0.17252211272716522, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0407505564391613, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019396600546315312, "signal/batch_coverage_10/centered_abs_mean": 0.1435418903827667, "signal/batch_coverage_10/group_std_mean": 0.18347274363040925, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043130411952733996, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020526490174233914, "signal/batch_coverage_15/centered_abs_mean": 0.14430699050426482, "signal/batch_coverage_15/group_std_mean": 0.18479589521884918, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.043363725394010545, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002063590008765459, "signal/batch_coverage_20/centered_abs_mean": 0.14351218342781066, "signal/batch_coverage_20/group_std_mean": 0.18394066393375397, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04315835386514664, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020522242411971092, "signal/batch_coverage_25/centered_abs_mean": 0.144418603181839, "signal/batch_coverage_25/group_std_mean": 0.18517533540725709, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04345187172293663, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002065186039544642, "signal/batch_coverage_5/centered_abs_mean": 0.13880332112312316, "signal/batch_coverage_5/group_std_mean": 0.17648251950740815, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.041730723530054095, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.001984887500293553, "signal/brier_reward/centered_abs_mean": 0.09185196608304977, "signal/brier_reward/group_std_mean": 0.11965415328741073, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19028878211975098, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009185196924954652, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012751653417944908, "signal/confidence_uniqueness_reward/group_std_mean": 0.01619891356676817, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026918485760688782, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012751653557643294, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002051408402621746, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27679008841514585, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34837120175361636, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5824168384075165, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02767900936305523, "step": 305 }, { "calibration/aurc": 0.35057905085704577, "calibration/batch_distribution_entropy": 0.9700197131768388, "calibration/buffer_distribution_entropy": 0.9887671953972358, "calibration/confidence_entropy": 0.464604167881242, "calibration/coverage@0%": 0.00859375, "calibration/coverage@1%": 0.00859375, "calibration/coverage@10%": 0.016796875, "calibration/coverage@15%": 0.072265625, "calibration/coverage@20%": 0.273046875, "calibration/coverage@25%": 0.3640625, "calibration/coverage@30%": 0.45546875, "calibration/coverage@5%": 0.00859375, "calibration/ece": 0.14386630288195734, "calibration/mean_confidence": 0.5126978774074185, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 609.0, "completions/max_terminated_length": 609.0, "completions/mean_length": 233.7537109375, "completions/mean_terminated_length": 233.7537109375, "completions/min_length": 123.8, "completions/min_terminated_length": 123.8, "epoch": 0.992, "grad_norm": 0.0069343592040240765, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 1078966707.0, "reward": 0.9605294466018677, "reward_std": 0.07477787286043167, "rewards/accuracy_reward": 0.530078125, "rewards/batch_coverage_0": 0.43314738273620607, "rewards/batch_coverage_1": 0.43314738273620607, "rewards/batch_coverage_10": 0.45744775533676146, "rewards/batch_coverage_15": 0.4635770797729492, "rewards/batch_coverage_20": 0.46865540742874146, "rewards/batch_coverage_25": 0.47068612575531005, "rewards/batch_coverage_5": 0.44648249745368956, "rewards/brier_reward": 0.8139971017837524, "rewards/confidence_uniqueness_reward": 0.9495025634765625, "rewards/format_reward": 1.0, "rewards/frontier_entropy_batch_reward": -0.2623551905155182, "signal/accuracy_reward/centered_abs_mean": 0.069140625, "signal/accuracy_reward/group_std_mean": 0.09174189940094948, "signal/accuracy_reward/group_zero_std_frac": 0.734375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7008903384208679, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0345703125, "signal/advantage_abs_mean": 0.7781669855117798, "signal/advantage_pre_scale_abs_mean": 0.05807532519102097, "signal/advantage_pre_scale_std": 0.09566855132579803, "signal/advantage_std": 0.9824541807174683, "signal/batch_coverage_0/centered_abs_mean": 0.14170411229133606, "signal/batch_coverage_0/group_std_mean": 0.17635067999362947, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.041892097890377046, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020263687707483768, "signal/batch_coverage_1/centered_abs_mean": 0.14170411229133606, "signal/batch_coverage_1/group_std_mean": 0.17635067999362947, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.041892097890377046, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020263687707483768, "signal/batch_coverage_10/centered_abs_mean": 0.14685550332069397, "signal/batch_coverage_10/group_std_mean": 0.18372409641742707, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04345771968364716, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021000336622819305, "signal/batch_coverage_15/centered_abs_mean": 0.14845097064971924, "signal/batch_coverage_15/group_std_mean": 0.18660386502742768, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.043903425335884094, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021228487603366375, "signal/batch_coverage_20/centered_abs_mean": 0.14924948811531066, "signal/batch_coverage_20/group_std_mean": 0.18820964694023132, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04425108656287193, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021342677529901264, "signal/batch_coverage_25/centered_abs_mean": 0.14973369836807252, "signal/batch_coverage_25/group_std_mean": 0.1890793949365616, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04441794827580452, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021411918569356203, "signal/batch_coverage_5/centered_abs_mean": 0.1451415926218033, "signal/batch_coverage_5/group_std_mean": 0.18125473260879515, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04290560409426689, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002075524744577706, "signal/brier_reward/centered_abs_mean": 0.09672370105981827, "signal/brier_reward/group_std_mean": 0.1231953427195549, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1986709266901016, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00967237027361989, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013905191421508789, "signal/confidence_uniqueness_reward/group_std_mean": 0.01732446514070034, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02885650247335434, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001390519179403782, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2993383765220642, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37033950686454775, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6192003607749939, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029933837801218034, "step": 310 }, { "calibration/aurc": 0.24144867169583295, "calibration/batch_distribution_entropy": 0.9655930825339882, "calibration/buffer_distribution_entropy": 0.9890472365063823, "calibration/confidence_entropy": 0.4795974319609599, "calibration/coverage@0%": 0.0029296875, "calibration/coverage@1%": 0.0029296875, "calibration/coverage@10%": 0.08984375, "calibration/coverage@15%": 0.15234375, "calibration/coverage@20%": 0.3466796875, "calibration/coverage@25%": 0.658203125, "calibration/coverage@30%": 0.7861328125, "calibration/coverage@5%": 0.0595703125, "calibration/ece": 0.14323620346773447, "calibration/mean_confidence": 0.6021585478088081, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 556.5, "completions/max_terminated_length": 556.5, "completions/mean_length": 241.6327667236328, "completions/mean_terminated_length": 241.6327667236328, "completions/min_length": 111.5, "completions/min_terminated_length": 111.5, "epoch": 0.9984, "num_tokens": 1085956914.0, "reward": 0.9854284524917603, "reward_std": 0.08515419811010361, "rewards/accuracy_reward": 0.586181640625, "rewards/batch_coverage_0": 0.37667161226272583, "rewards/batch_coverage_1": 0.37667161226272583, "rewards/batch_coverage_10": 0.40571996569633484, "rewards/batch_coverage_15": 0.4070962965488434, "rewards/batch_coverage_20": 0.4126690626144409, "rewards/batch_coverage_25": 0.41465383768081665, "rewards/batch_coverage_5": 0.38757333159446716, "rewards/brier_reward": 0.7919209599494934, "rewards/confidence_uniqueness_reward": 0.9523754119873047, "rewards/format_reward": 1.0, "rewards/frontier_entropy_batch_reward": -0.218610942363739, "signal/accuracy_reward/centered_abs_mean": 0.0902557373046875, "signal/accuracy_reward/group_std_mean": 0.11484164744615555, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8470200598239899, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04512786865234375, "signal/advantage_abs_mean": 0.781099408864975, "signal/advantage_pre_scale_abs_mean": 0.06732682883739471, "signal/advantage_pre_scale_std": 0.10796621814370155, "signal/advantage_std": 0.9826479554176331, "signal/batch_coverage_0/centered_abs_mean": 0.14751210063695908, "signal/batch_coverage_0/group_std_mean": 0.18181322515010834, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03951258212327957, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021094229305163026, "signal/batch_coverage_1/centered_abs_mean": 0.14751210063695908, "signal/batch_coverage_1/group_std_mean": 0.18181322515010834, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03951258212327957, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021094229305163026, "signal/batch_coverage_10/centered_abs_mean": 0.1506556123495102, "signal/batch_coverage_10/group_std_mean": 0.1876886636018753, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04036974348127842, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021543753100559115, "signal/batch_coverage_15/centered_abs_mean": 0.1514410898089409, "signal/batch_coverage_15/group_std_mean": 0.18875804543495178, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040581924840807915, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021656075259670615, "signal/batch_coverage_20/centered_abs_mean": 0.1517779529094696, "signal/batch_coverage_20/group_std_mean": 0.19017679244279861, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04067254438996315, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.00217042479198426, "signal/batch_coverage_25/centered_abs_mean": 0.14645803719758987, "signal/batch_coverage_25/group_std_mean": 0.18481793254613876, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03923363797366619, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020943498238921165, "signal/batch_coverage_5/centered_abs_mean": 0.15008513629436493, "signal/batch_coverage_5/group_std_mean": 0.18554619699716568, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04019570350646973, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002146217506378889, "signal/brier_reward/centered_abs_mean": 0.10291799902915955, "signal/brier_reward/group_std_mean": 0.13079295679926872, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19292094558477402, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010291799437254667, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012238562107086182, "signal/confidence_uniqueness_reward/group_std_mean": 0.015250732190907001, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022890268824994564, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012238562339916825, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2838418483734131, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3580092638731003, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5310377329587936, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028384186327457428, "step": 312, "total_flos": 0.0, "train_loss": -0.01170685039258574, "train_runtime": 64987.1739, "train_samples_per_second": 0.308, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1085956914, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }