{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.4786468696997992, "calibration/batch_distribution_entropy": 0.27434989424557693, "calibration/batch_entropy_100bins": 0.3452116907370852, "calibration/batch_entropy_10bins": 0.27434989424557693, "calibration/batch_entropy_50bins": 0.40370561408688826, "calibration/batch_uniqueness": 0.4969804532848675, "calibration/confidence_entropy": 0.215996847848038, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.45861411649511047, "calibration/mean_confidence": 0.9140472626196257, "calibration/prompt_uniqueness": 0.35674800174725496, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020225694444444442, "completions/max_length": 4034.2, "completions/max_terminated_length": 4034.2, "completions/mean_length": 518.5538208007813, "completions/mean_terminated_length": 529.2614379882813, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.0034298275131732225, "learning_rate": 5.952380952380953e-07, "loss": 0.0041, "num_tokens": 9087948.0, "reward": 0.48353423476219176, "reward_std": 0.4472260117530823, "rewards/accuracy_reward": 0.25295138359069824, "rewards/brier_reward": 0.3069717109203339, "rewards/confidence_uniqueness_reward": 0.28508294820785524, "rewards/format_reward": 0.5980902671813965, "rewards/frontier_aurc_reward": 0.26909309029579165, "rewards/frontier_coverage_0": 0.26909309029579165, "rewards/frontier_coverage_1": 0.26909309029579165, "rewards/frontier_coverage_10": 0.26909309029579165, "rewards/frontier_coverage_15": 0.26909309029579165, "rewards/frontier_coverage_20": 0.26909309029579165, "rewards/frontier_coverage_25": 0.26909309029579165, "rewards/frontier_coverage_5": 0.26909309029579165, "rewards/frontier_ece_reward": 0.26909309029579165, "rewards/frontier_entropy_batch_reward": -0.5501068949699401, "signal/accuracy_reward/centered_abs_mean": 0.30129122734069824, "signal/accuracy_reward/group_bin_occupancy": 0.2361111111111111, "signal/accuracy_reward/group_std_mean": 0.3599981427192688, "signal/accuracy_reward/group_zero_std_frac": 0.11111111268401146, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15064561367034912, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15064561367034912, "signal/advantage_abs_mean": 0.38422595858573916, "signal/advantage_pre_scale_abs_mean": 0.38422595858573916, "signal/advantage_pre_scale_std": 0.4541194498538971, "signal/advantage_std": 0.4541194498538971, "signal/brier_reward/centered_abs_mean": 0.31531033515930174, "signal/brier_reward/group_bin_occupancy": 0.5211805555555555, "signal/brier_reward/group_std_mean": 0.36791505217552184, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031531032919883725, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031531032919883725, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23501766622066497, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6003472222222223, "signal/confidence_uniqueness_reward/group_std_mean": 0.2864716470241547, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023501767963171005, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023501767963171005, "signal/format_reward/centered_abs_mean": 0.43889973759651185, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.4739928424358368, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21944986879825593, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21944986879825593, "signal/frontier_aurc_reward/centered_abs_mean": 0.3046343684196472, "signal/frontier_aurc_reward/group_bin_occupancy": 0.4024305555555555, "signal/frontier_aurc_reward/group_std_mean": 0.36159105896949767, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_0/centered_abs_mean": 0.3046343684196472, "signal/frontier_coverage_0/group_bin_occupancy": 0.4024305555555555, "signal/frontier_coverage_0/group_std_mean": 0.36159105896949767, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_1/centered_abs_mean": 0.3046343684196472, "signal/frontier_coverage_1/group_bin_occupancy": 0.4024305555555555, "signal/frontier_coverage_1/group_std_mean": 0.36159105896949767, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_10/centered_abs_mean": 0.3046343684196472, "signal/frontier_coverage_10/group_bin_occupancy": 0.4024305555555555, "signal/frontier_coverage_10/group_std_mean": 0.36159105896949767, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_15/centered_abs_mean": 0.3046343684196472, "signal/frontier_coverage_15/group_bin_occupancy": 0.4024305555555555, "signal/frontier_coverage_15/group_std_mean": 0.36159105896949767, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_20/centered_abs_mean": 0.3046343684196472, "signal/frontier_coverage_20/group_bin_occupancy": 0.4024305555555555, "signal/frontier_coverage_20/group_std_mean": 0.36159105896949767, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_25/centered_abs_mean": 0.3046343684196472, "signal/frontier_coverage_25/group_bin_occupancy": 0.4024305555555555, "signal/frontier_coverage_25/group_std_mean": 0.36159105896949767, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_5/centered_abs_mean": 0.3046343684196472, "signal/frontier_coverage_5/group_bin_occupancy": 0.4024305555555555, "signal/frontier_coverage_5/group_std_mean": 0.36159105896949767, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003807929763570428, "signal/frontier_ece_reward/centered_abs_mean": 0.3046343684196472, "signal/frontier_ece_reward/group_bin_occupancy": 0.4024305555555555, "signal/frontier_ece_reward/group_std_mean": 0.36159105896949767, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030463438108563425, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030463438108563425, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45770797729492185, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3309027777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.49182985424995423, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0457707978785038, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0457707978785038, "step": 5 }, { "calibration/aurc": 0.5215323422297184, "calibration/batch_distribution_entropy": 0.27063770046218427, "calibration/batch_entropy_100bins": 0.3524338658024668, "calibration/batch_entropy_10bins": 0.27063770046218427, "calibration/batch_entropy_50bins": 0.41210669269116024, "calibration/batch_uniqueness": 0.5186394142692434, "calibration/confidence_entropy": 0.22697660378505544, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4744829452486872, "calibration/mean_confidence": 0.9174019066918241, "calibration/prompt_uniqueness": 0.4043385400555728, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017274305555555557, "completions/max_length": 4042.4, "completions/max_terminated_length": 4042.4, "completions/mean_length": 478.6330810546875, "completions/mean_terminated_length": 487.2344909667969, "completions/min_length": 0.0, "completions/min_terminated_length": 19.2, "epoch": 0.023999700003749954, "grad_norm": 0.020310023799538612, "learning_rate": 1.1904761904761906e-06, "loss": 0.0026, "num_tokens": 17684521.0, "reward": 0.5639899730682373, "reward_std": 0.4257666528224945, "rewards/accuracy_reward": 0.28472222983837125, "rewards/brier_reward": 0.34975607991218566, "rewards/confidence_uniqueness_reward": 0.35827080607414247, "rewards/format_reward": 0.7129340171813965, "rewards/frontier_aurc_reward": 0.30107017755508425, "rewards/frontier_coverage_0": 0.30107017755508425, "rewards/frontier_coverage_1": 0.30107017755508425, "rewards/frontier_coverage_10": 0.30107017755508425, "rewards/frontier_coverage_15": 0.30107017755508425, "rewards/frontier_coverage_20": 0.30107017755508425, "rewards/frontier_coverage_25": 0.30107017755508425, "rewards/frontier_coverage_5": 0.30107017755508425, "rewards/frontier_ece_reward": 0.30107017755508425, "rewards/frontier_entropy_batch_reward": -0.6585487723350525, "signal/accuracy_reward/centered_abs_mean": 0.31558159589767454, "signal/accuracy_reward/group_bin_occupancy": 0.24027777777777776, "signal/accuracy_reward/group_std_mean": 0.37627485394477844, "signal/accuracy_reward/group_zero_std_frac": 0.07777777928858995, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15779079794883727, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15779079794883727, "signal/advantage_abs_mean": 0.3536957919597626, "signal/advantage_pre_scale_abs_mean": 0.3536957919597626, "signal/advantage_pre_scale_std": 0.4313755929470062, "signal/advantage_std": 0.4313755929470062, "signal/brier_reward/centered_abs_mean": 0.31298828125, "signal/brier_reward/group_bin_occupancy": 0.545138888888889, "signal/brier_reward/group_std_mean": 0.36775757670402526, "signal/brier_reward/group_zero_std_frac": 0.00555555559694767, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03129882961511612, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03129882961511612, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22144390940666198, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6322916666666667, "signal/confidence_uniqueness_reward/group_std_mean": 0.27823981642723083, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022144390642642973, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022144390642642973, "signal/format_reward/centered_abs_mean": 0.3566026449203491, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.42138834595680236, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17830132246017455, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17830132246017455, "signal/frontier_aurc_reward/centered_abs_mean": 0.3110755383968353, "signal/frontier_aurc_reward/group_bin_occupancy": 0.4229166666666667, "signal/frontier_aurc_reward/group_std_mean": 0.3699175715446472, "signal/frontier_aurc_reward/group_zero_std_frac": 0.00555555559694767, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_0/centered_abs_mean": 0.3110755383968353, "signal/frontier_coverage_0/group_bin_occupancy": 0.4229166666666667, "signal/frontier_coverage_0/group_std_mean": 0.3699175715446472, "signal/frontier_coverage_0/group_zero_std_frac": 0.00555555559694767, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_1/centered_abs_mean": 0.3110755383968353, "signal/frontier_coverage_1/group_bin_occupancy": 0.4229166666666667, "signal/frontier_coverage_1/group_std_mean": 0.3699175715446472, "signal/frontier_coverage_1/group_zero_std_frac": 0.00555555559694767, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_10/centered_abs_mean": 0.3110755383968353, "signal/frontier_coverage_10/group_bin_occupancy": 0.4229166666666667, "signal/frontier_coverage_10/group_std_mean": 0.3699175715446472, "signal/frontier_coverage_10/group_zero_std_frac": 0.00555555559694767, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_15/centered_abs_mean": 0.3110755383968353, "signal/frontier_coverage_15/group_bin_occupancy": 0.4229166666666667, "signal/frontier_coverage_15/group_std_mean": 0.3699175715446472, "signal/frontier_coverage_15/group_zero_std_frac": 0.00555555559694767, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_20/centered_abs_mean": 0.3110755383968353, "signal/frontier_coverage_20/group_bin_occupancy": 0.4229166666666667, "signal/frontier_coverage_20/group_std_mean": 0.3699175715446472, "signal/frontier_coverage_20/group_zero_std_frac": 0.00555555559694767, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_25/centered_abs_mean": 0.3110755383968353, "signal/frontier_coverage_25/group_bin_occupancy": 0.4229166666666667, "signal/frontier_coverage_25/group_std_mean": 0.3699175715446472, "signal/frontier_coverage_25/group_zero_std_frac": 0.00555555559694767, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_5/centered_abs_mean": 0.3110755383968353, "signal/frontier_coverage_5/group_bin_occupancy": 0.4229166666666667, "signal/frontier_coverage_5/group_std_mean": 0.3699175715446472, "signal/frontier_coverage_5/group_zero_std_frac": 0.00555555559694767, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003888444369658828, "signal/frontier_ece_reward/centered_abs_mean": 0.3110755383968353, "signal/frontier_ece_reward/group_bin_occupancy": 0.4229166666666667, "signal/frontier_ece_reward/group_std_mean": 0.3699175715446472, "signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031107554957270623, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031107554957270623, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4007949113845825, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.34236111111111117, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4584290623664856, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04007949084043503, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04007949084043503, "step": 10 }, { "calibration/aurc": 0.5634525142239435, "calibration/batch_distribution_entropy": 0.2936840156784979, "calibration/batch_entropy_100bins": 0.3574441819799151, "calibration/batch_entropy_10bins": 0.2936840156784979, "calibration/batch_entropy_50bins": 0.41566705111670643, "calibration/batch_uniqueness": 0.5181815236994417, "calibration/confidence_entropy": 0.22663220422136415, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5354899110079999, "calibration/mean_confidence": 0.9149286672818105, "calibration/prompt_uniqueness": 0.3991411197867409, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 4018.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 430.7875915527344, "completions/mean_terminated_length": 435.9453552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.03599955000562493, "grad_norm": 0.0013605451676994562, "learning_rate": 1.7857142857142859e-06, "loss": -0.0061, "num_tokens": 25749178.0, "reward": 0.6798976540565491, "reward_std": 0.34733850955963136, "rewards/accuracy_reward": 0.3006076455116272, "rewards/brier_reward": 0.4006872236728668, "rewards/confidence_uniqueness_reward": 0.49280205368995667, "rewards/format_reward": 0.9177083253860474, "rewards/frontier_aurc_reward": 0.32862133979797364, "rewards/frontier_coverage_0": 0.32862133979797364, "rewards/frontier_coverage_1": 0.32862133979797364, "rewards/frontier_coverage_10": 0.32862133979797364, "rewards/frontier_coverage_15": 0.32862133979797364, "rewards/frontier_coverage_20": 0.32862133979797364, "rewards/frontier_coverage_25": 0.32862133979797364, "rewards/frontier_coverage_5": 0.32862133979797364, "rewards/frontier_ece_reward": 0.32862133979797364, "rewards/frontier_entropy_batch_reward": -0.8433352708816528, "signal/accuracy_reward/centered_abs_mean": 0.31458876729011537, "signal/accuracy_reward/group_bin_occupancy": 0.23888888888888887, "signal/accuracy_reward/group_std_mean": 0.3746976673603058, "signal/accuracy_reward/group_zero_std_frac": 0.0888888917863369, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15729438364505768, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15729438364505768, "signal/advantage_abs_mean": 0.2811680108308792, "signal/advantage_pre_scale_abs_mean": 0.2811680108308792, "signal/advantage_pre_scale_std": 0.3555997729301453, "signal/advantage_std": 0.3555997729301453, "signal/brier_reward/centered_abs_mean": 0.3005147516727448, "signal/brier_reward/group_bin_occupancy": 0.6149305555555555, "signal/brier_reward/group_std_mean": 0.35394822955131533, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030051474645733833, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030051474645733833, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18455613553524017, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6274305555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.23438866436481476, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018455613404512405, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018455613404512405, "signal/format_reward/centered_abs_mean": 0.13573133796453477, "signal/format_reward/group_bin_occupancy": 0.22152777777777782, "signal/format_reward/group_std_mean": 0.220550999045372, "signal/format_reward/group_zero_std_frac": 0.22777777388691903, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06786566898226738, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.06786566898226738, "signal/frontier_aurc_reward/centered_abs_mean": 0.30784491300582884, "signal/frontier_aurc_reward/group_bin_occupancy": 0.48611111111111105, "signal/frontier_aurc_reward/group_std_mean": 0.36494665741920473, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_0/centered_abs_mean": 0.30784491300582884, "signal/frontier_coverage_0/group_bin_occupancy": 0.48611111111111105, "signal/frontier_coverage_0/group_std_mean": 0.36494665741920473, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_1/centered_abs_mean": 0.30784491300582884, "signal/frontier_coverage_1/group_bin_occupancy": 0.48611111111111105, "signal/frontier_coverage_1/group_std_mean": 0.36494665741920473, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_10/centered_abs_mean": 0.30784491300582884, "signal/frontier_coverage_10/group_bin_occupancy": 0.48611111111111105, "signal/frontier_coverage_10/group_std_mean": 0.36494665741920473, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_15/centered_abs_mean": 0.30784491300582884, "signal/frontier_coverage_15/group_bin_occupancy": 0.48611111111111105, "signal/frontier_coverage_15/group_std_mean": 0.36494665741920473, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_20/centered_abs_mean": 0.30784491300582884, "signal/frontier_coverage_20/group_bin_occupancy": 0.48611111111111105, "signal/frontier_coverage_20/group_std_mean": 0.36494665741920473, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_25/centered_abs_mean": 0.30784491300582884, "signal/frontier_coverage_25/group_bin_occupancy": 0.48611111111111105, "signal/frontier_coverage_25/group_std_mean": 0.36494665741920473, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_5/centered_abs_mean": 0.30784491300582884, "signal/frontier_coverage_5/group_bin_occupancy": 0.48611111111111105, "signal/frontier_coverage_5/group_std_mean": 0.36494665741920473, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038480616174638273, "signal/frontier_ece_reward/centered_abs_mean": 0.30784491300582884, "signal/frontier_ece_reward/group_bin_occupancy": 0.48611111111111105, "signal/frontier_ece_reward/group_std_mean": 0.36494665741920473, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03078449293971062, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03078449293971062, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24438310861587526, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.36006944444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3511778712272644, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.02500000037252903, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02443831190466881, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02443831190466881, "step": 15 }, { "calibration/aurc": 0.49393609341404227, "calibration/batch_distribution_entropy": 0.3950132648234722, "calibration/batch_entropy_100bins": 0.40505684224566296, "calibration/batch_entropy_10bins": 0.3950132648234722, "calibration/batch_entropy_50bins": 0.4693520387698801, "calibration/batch_uniqueness": 0.61509530787946, "calibration/buffer_distribution_entropy": 0.3091847349375323, "calibration/buffer_entropy_100bins": 0.37384274971807285, "calibration/buffer_entropy_10bins": 0.3091847349375323, "calibration/buffer_entropy_50bins": 0.4345021124322783, "calibration/confidence_entropy": 0.2933222613376684, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.029023746701846966, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4258668636956767, "calibration/mean_confidence": 0.8878107373812835, "calibration/prompt_uniqueness": 0.517297995778335, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010850694444444442, "completions/max_length": 3979.6, "completions/max_terminated_length": 3979.6, "completions/mean_length": 432.4179748535156, "completions/mean_terminated_length": 437.20750732421874, "completions/min_length": 0.0, "completions/min_terminated_length": 68.8, "epoch": 0.04799940000749991, "grad_norm": 0.0028677526861429214, "learning_rate": 2.380952380952381e-06, "loss": -0.0086, "num_tokens": 33844329.0, "reward": 0.7569576263427734, "reward_std": 0.2728476107120514, "rewards/accuracy_reward": 0.41449652910232543, "rewards/brier_reward": 0.5286458790302276, "rewards/confidence_uniqueness_reward": 0.606674587726593, "rewards/format_reward": 0.9817708253860473, "rewards/frontier_aurc_reward": 0.17920130817219615, "rewards/frontier_coverage_0": 0.1886154913343489, "rewards/frontier_coverage_1": 0.1886154913343489, "rewards/frontier_coverage_10": 0.1886154913343489, "rewards/frontier_coverage_15": 0.1886154913343489, "rewards/frontier_coverage_20": 0.1886154913343489, "rewards/frontier_coverage_25": 0.1886154913343489, "rewards/frontier_coverage_5": 0.1886154913343489, "rewards/frontier_ece_reward": 0.1644112183363177, "rewards/frontier_entropy_batch_reward": -0.8989308953285218, "signal/accuracy_reward/centered_abs_mean": 0.2998209595680237, "signal/accuracy_reward/group_bin_occupancy": 0.24131944444444448, "signal/accuracy_reward/group_std_mean": 0.3666124284267426, "signal/accuracy_reward/group_zero_std_frac": 0.06944444701075554, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14991047978401184, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14991047978401184, "signal/advantage_abs_mean": 0.2194227993488312, "signal/advantage_pre_scale_abs_mean": 0.2194227993488312, "signal/advantage_pre_scale_std": 0.28129519820213317, "signal/advantage_std": 0.28129519820213317, "signal/brier_reward/centered_abs_mean": 0.26685882806777955, "signal/brier_reward/group_bin_occupancy": 0.6607638888888889, "signal/brier_reward/group_std_mean": 0.32316548824310304, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02668588310480118, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02668588310480118, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15834780037403107, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6138888888888888, "signal/confidence_uniqueness_reward/group_std_mean": 0.19463339745998381, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015834780223667622, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015834780223667622, "signal/format_reward/centered_abs_mean": 0.03338758684694767, "signal/format_reward/group_bin_occupancy": 0.16562499999999997, "signal/format_reward/group_std_mean": 0.07192002534866333, "signal/format_reward/group_zero_std_frac": 0.675000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016693793423473834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016693793423473834, "signal/frontier_aurc_reward/centered_abs_mean": 0.12922168229706585, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6350694444444445, "signal/frontier_aurc_reward/group_std_mean": 0.1566169561818242, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.001615271106857108, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.001615271106857108, "signal/frontier_coverage_0/centered_abs_mean": 0.1444099996238947, "signal/frontier_coverage_0/group_bin_occupancy": 0.6125, "signal/frontier_coverage_0/group_std_mean": 0.18344281539320945, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_1/centered_abs_mean": 0.1444099996238947, "signal/frontier_coverage_1/group_bin_occupancy": 0.6125, "signal/frontier_coverage_1/group_std_mean": 0.18344281539320945, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_10/centered_abs_mean": 0.1444099996238947, "signal/frontier_coverage_10/group_bin_occupancy": 0.6125, "signal/frontier_coverage_10/group_std_mean": 0.18344281539320945, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_15/centered_abs_mean": 0.1444099996238947, "signal/frontier_coverage_15/group_bin_occupancy": 0.6125, "signal/frontier_coverage_15/group_std_mean": 0.18344281539320945, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_20/centered_abs_mean": 0.1444099996238947, "signal/frontier_coverage_20/group_bin_occupancy": 0.6125, "signal/frontier_coverage_20/group_std_mean": 0.18344281539320945, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_25/centered_abs_mean": 0.1444099996238947, "signal/frontier_coverage_25/group_bin_occupancy": 0.6125, "signal/frontier_coverage_25/group_std_mean": 0.18344281539320945, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_5/centered_abs_mean": 0.1444099996238947, "signal/frontier_coverage_5/group_bin_occupancy": 0.6125, "signal/frontier_coverage_5/group_std_mean": 0.18344281539320945, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018051250837743283, "signal/frontier_ece_reward/centered_abs_mean": 0.22200666069984437, "signal/frontier_ece_reward/group_bin_occupancy": 0.4690972222222222, "signal/frontier_ece_reward/group_std_mean": 0.26992476880550387, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02220066711306572, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02220066711306572, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16805205643177032, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3486111111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2819783270359039, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09166666865348816, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01680520586669445, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01680520586669445, "step": 20 }, { "calibration/aurc": 0.40831880456732605, "calibration/batch_distribution_entropy": 0.6097885070554001, "calibration/batch_entropy_100bins": 0.48593398604284915, "calibration/batch_entropy_10bins": 0.6097885070554001, "calibration/batch_entropy_50bins": 0.5663091534135619, "calibration/batch_uniqueness": 0.7425439382867337, "calibration/buffer_distribution_entropy": 0.36603622224810134, "calibration/buffer_entropy_100bins": 0.39999389908282235, "calibration/buffer_entropy_10bins": 0.36603622224810134, "calibration/buffer_entropy_50bins": 0.4646358566837005, "calibration/confidence_entropy": 0.39686215013611503, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.07571801566579635, "calibration/coverage@30%": 0.2670907759680606, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2710533311020483, "calibration/mean_confidence": 0.8242252010281881, "calibration/prompt_uniqueness": 0.6616427423448029, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00980902777777779, "completions/max_length": 3722.6, "completions/max_terminated_length": 3722.6, "completions/mean_length": 459.39210205078126, "completions/mean_terminated_length": 463.9351379394531, "completions/min_length": 0.0, "completions/min_terminated_length": 102.2, "epoch": 0.05999925000937488, "grad_norm": 0.0009269694564864039, "learning_rate": 2.9761904761904763e-06, "loss": -0.0064, "num_tokens": 42260974.0, "reward": 0.7958746194839478, "reward_std": 0.2274015724658966, "rewards/accuracy_reward": 0.5085069417953492, "rewards/brier_reward": 0.6438981890678406, "rewards/confidence_uniqueness_reward": 0.734617817401886, "rewards/format_reward": 0.9876736283302308, "rewards/frontier_aurc_reward": -0.004628232168033719, "rewards/frontier_coverage_0": 0.005646060802973807, "rewards/frontier_coverage_1": 0.005646060802973807, "rewards/frontier_coverage_10": 0.005646060802973807, "rewards/frontier_coverage_15": 0.005646060802973807, "rewards/frontier_coverage_20": 0.005646060802973807, "rewards/frontier_coverage_25": 0.005646060802973807, "rewards/frontier_coverage_5": 0.005646060802973807, "rewards/frontier_ece_reward": 0.007948444318026304, "rewards/frontier_entropy_batch_reward": -0.9129829168319702, "signal/accuracy_reward/centered_abs_mean": 0.28665364980697633, "signal/accuracy_reward/group_bin_occupancy": 0.23819444444444446, "signal/accuracy_reward/group_std_mean": 0.3534803450107574, "signal/accuracy_reward/group_zero_std_frac": 0.09444444552063942, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14332682490348816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14332682490348816, "signal/advantage_abs_mean": 0.18016738891601564, "signal/advantage_pre_scale_abs_mean": 0.18016738891601564, "signal/advantage_pre_scale_std": 0.238165420293808, "signal/advantage_std": 0.238165420293808, "signal/brier_reward/centered_abs_mean": 0.21931754648685456, "signal/brier_reward/group_bin_occupancy": 0.7795138888888888, "signal/brier_reward/group_std_mean": 0.27221688628196716, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021931754797697066, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021931754797697066, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09854339063167572, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6871527777777777, "signal/confidence_uniqueness_reward/group_std_mean": 0.12727494537830353, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009854339342564345, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009854339342564345, "signal/format_reward/centered_abs_mean": 0.022319878824055193, "signal/format_reward/group_bin_occupancy": 0.15138888888888888, "signal/format_reward/group_std_mean": 0.047178071737289426, "signal/format_reward/group_zero_std_frac": 0.7888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011159939412027597, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011159939412027597, "signal/frontier_aurc_reward/centered_abs_mean": 0.002872100844979286, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7059027777777778, "signal/frontier_aurc_reward/group_std_mean": 0.004377482458949089, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5901259980164466e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5901259980164466e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.0523149847984314, "signal/frontier_coverage_0/group_bin_occupancy": 0.7753472222222222, "signal/frontier_coverage_0/group_std_mean": 0.08000584244728089, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_1/centered_abs_mean": 0.0523149847984314, "signal/frontier_coverage_1/group_bin_occupancy": 0.7753472222222222, "signal/frontier_coverage_1/group_std_mean": 0.08000584244728089, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_10/centered_abs_mean": 0.0523149847984314, "signal/frontier_coverage_10/group_bin_occupancy": 0.7753472222222222, "signal/frontier_coverage_10/group_std_mean": 0.08000584244728089, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_15/centered_abs_mean": 0.0523149847984314, "signal/frontier_coverage_15/group_bin_occupancy": 0.7753472222222222, "signal/frontier_coverage_15/group_std_mean": 0.08000584244728089, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_20/centered_abs_mean": 0.0523149847984314, "signal/frontier_coverage_20/group_bin_occupancy": 0.7753472222222222, "signal/frontier_coverage_20/group_std_mean": 0.08000584244728089, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_25/centered_abs_mean": 0.0523149847984314, "signal/frontier_coverage_25/group_bin_occupancy": 0.7753472222222222, "signal/frontier_coverage_25/group_std_mean": 0.08000584244728089, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_5/centered_abs_mean": 0.0523149847984314, "signal/frontier_coverage_5/group_bin_occupancy": 0.7753472222222222, "signal/frontier_coverage_5/group_std_mean": 0.08000584244728089, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00065393730183132, "signal/frontier_ece_reward/centered_abs_mean": 0.13553643673658372, "signal/frontier_ece_reward/group_bin_occupancy": 0.6496527777777777, "signal/frontier_ece_reward/group_std_mean": 0.16666682958602905, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013553644344210625, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013553644344210625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1483454465866089, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30590277777777775, "signal/frontier_entropy_batch_reward/group_std_mean": 0.26824913918972015, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1527777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0148345448076725, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0148345448076725, "step": 25 }, { "calibration/aurc": 0.31340018732890096, "calibration/batch_distribution_entropy": 0.7048907327472749, "calibration/batch_entropy_100bins": 0.5451866323540866, "calibration/batch_entropy_10bins": 0.7048907327472749, "calibration/batch_entropy_50bins": 0.6235172362071362, "calibration/batch_uniqueness": 0.7777645419569349, "calibration/buffer_distribution_entropy": 0.4720834768392196, "calibration/buffer_entropy_100bins": 0.4512809972000536, "calibration/buffer_entropy_10bins": 0.4720834768392196, "calibration/buffer_entropy_50bins": 0.5236436414920623, "calibration/confidence_entropy": 0.5383263280850088, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.060188392145496705, "calibration/coverage@25%": 0.17894412155077963, "calibration/coverage@30%": 0.3775832099984736, "calibration/coverage@5%": 0.0, "calibration/ece": 0.12798357790902398, "calibration/mean_confidence": 0.7155363782263905, "calibration/prompt_uniqueness": 0.7025420849805335, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01553819444444442, "completions/max_length": 3862.6, "completions/max_terminated_length": 3862.6, "completions/mean_length": 536.9459350585937, "completions/mean_terminated_length": 545.4788208007812, "completions/min_length": 0.0, "completions/min_terminated_length": 122.2, "epoch": 0.07199910001124986, "grad_norm": 0.0006166549865156412, "learning_rate": 3.5714285714285718e-06, "loss": -0.0102, "num_tokens": 51556511.0, "reward": 0.8471167922019959, "reward_std": 0.19835625290870668, "rewards/accuracy_reward": 0.5782118082046509, "rewards/brier_reward": 0.7210497856140137, "rewards/confidence_uniqueness_reward": 0.7651053071022034, "rewards/format_reward": 0.9834201216697693, "rewards/frontier_aurc_reward": -0.003613748401403427, "rewards/frontier_coverage_0": -0.00830224696546793, "rewards/frontier_coverage_1": -0.00830224696546793, "rewards/frontier_coverage_10": -0.00830224696546793, "rewards/frontier_coverage_15": -0.00830224696546793, "rewards/frontier_coverage_20": -0.00830224696546793, "rewards/frontier_coverage_25": -0.00830224696546793, "rewards/frontier_coverage_5": -0.00830224696546793, "rewards/frontier_ece_reward": 0.023493098840117455, "rewards/frontier_entropy_batch_reward": -0.838923704624176, "signal/accuracy_reward/centered_abs_mean": 0.24400499165058137, "signal/accuracy_reward/group_bin_occupancy": 0.22743055555555552, "signal/accuracy_reward/group_std_mean": 0.30676281452178955, "signal/accuracy_reward/group_zero_std_frac": 0.18055555820465088, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12200249582529069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12200249582529069, "signal/advantage_abs_mean": 0.15304453670978546, "signal/advantage_pre_scale_abs_mean": 0.15304453670978546, "signal/advantage_pre_scale_std": 0.2149382084608078, "signal/advantage_std": 0.2149382084608078, "signal/brier_reward/centered_abs_mean": 0.15970109701156615, "signal/brier_reward/group_bin_occupancy": 0.8368055555555556, "signal/brier_reward/group_std_mean": 0.20384239852428437, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970110520720483, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015970110520720483, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11436907052993775, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.689236111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.14522747993469237, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011436907574534416, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011436907574534416, "signal/format_reward/centered_abs_mean": 0.02875976599752903, "signal/format_reward/group_bin_occupancy": 0.15590277777777778, "signal/format_reward/group_std_mean": 0.057394811511039735, "signal/format_reward/group_zero_std_frac": 0.7527777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014379882998764516, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014379882998764516, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016801425954326987, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7125, "signal/frontier_aurc_reward/group_std_mean": 0.002739586587995291, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1001783170504496e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1001783170504496e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.0830587849020958, "signal/frontier_coverage_0/group_bin_occupancy": 0.8541666666666666, "signal/frontier_coverage_0/group_std_mean": 0.11073667109012604, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_1/centered_abs_mean": 0.0830587849020958, "signal/frontier_coverage_1/group_bin_occupancy": 0.8541666666666666, "signal/frontier_coverage_1/group_std_mean": 0.11073667109012604, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_10/centered_abs_mean": 0.0830587849020958, "signal/frontier_coverage_10/group_bin_occupancy": 0.8541666666666666, "signal/frontier_coverage_10/group_std_mean": 0.11073667109012604, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_15/centered_abs_mean": 0.0830587849020958, "signal/frontier_coverage_15/group_bin_occupancy": 0.8541666666666666, "signal/frontier_coverage_15/group_std_mean": 0.11073667109012604, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_20/centered_abs_mean": 0.0830587849020958, "signal/frontier_coverage_20/group_bin_occupancy": 0.8541666666666666, "signal/frontier_coverage_20/group_std_mean": 0.11073667109012604, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_25/centered_abs_mean": 0.0830587849020958, "signal/frontier_coverage_25/group_bin_occupancy": 0.8541666666666666, "signal/frontier_coverage_25/group_std_mean": 0.11073667109012604, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_5/centered_abs_mean": 0.0830587849020958, "signal/frontier_coverage_5/group_bin_occupancy": 0.8541666666666666, "signal/frontier_coverage_5/group_std_mean": 0.11073667109012604, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001038234820589423, "signal/frontier_ece_reward/centered_abs_mean": 0.08026944175362587, "signal/frontier_ece_reward/group_bin_occupancy": 0.7819444444444444, "signal/frontier_ece_reward/group_std_mean": 0.10268646031618119, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008026944752782584, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008026944752782584, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2555039495229721, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.41215277777777787, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38815844655036924, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.03888888955116272, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025550395622849463, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025550395622849463, "step": 30 }, { "calibration/aurc": 0.25956437104203445, "calibration/batch_distribution_entropy": 0.8165243972425698, "calibration/batch_entropy_100bins": 0.8075646239662813, "calibration/batch_entropy_10bins": 0.8165243972425698, "calibration/batch_entropy_50bins": 0.8399069478240373, "calibration/batch_uniqueness": 0.9168564222845603, "calibration/buffer_distribution_entropy": 0.5710273729435295, "calibration/buffer_entropy_100bins": 0.5281526593776766, "calibration/buffer_entropy_10bins": 0.5710273729435295, "calibration/buffer_entropy_50bins": 0.601999214873284, "calibration/confidence_entropy": 0.5454837996401404, "calibration/coverage@0%": 0.0020887728459530026, "calibration/coverage@1%": 0.0020887728459530026, "calibration/coverage@10%": 0.06197192161099606, "calibration/coverage@15%": 0.11850014659270083, "calibration/coverage@20%": 0.3127652813079301, "calibration/coverage@25%": 0.5564344484546024, "calibration/coverage@30%": 0.6621659907782901, "calibration/coverage@5%": 0.02454308093994778, "calibration/ece": 0.11688668618444734, "calibration/mean_confidence": 0.670672226113574, "calibration/prompt_uniqueness": 0.8470008776094577, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020486111111111115, "completions/max_length": 3877.4, "completions/max_terminated_length": 3877.4, "completions/mean_length": 578.4817749023438, "completions/mean_terminated_length": 590.725341796875, "completions/min_length": 0.0, "completions/min_terminated_length": 146.4, "epoch": 0.08399895001312484, "grad_norm": 0.0005679262103512883, "learning_rate": 4.166666666666667e-06, "loss": -0.0128, "num_tokens": 61298061.0, "reward": 0.9099455237388611, "reward_std": 0.1839153379201889, "rewards/accuracy_reward": 0.61875, "rewards/brier_reward": 0.7439757466316224, "rewards/confidence_uniqueness_reward": 0.8963147759437561, "rewards/format_reward": 0.9782118082046509, "rewards/frontier_aurc_reward": -0.0029043381568044425, "rewards/frontier_coverage_0": -0.01887217308394611, "rewards/frontier_coverage_1": -0.01887217308394611, "rewards/frontier_coverage_10": -0.01887217308394611, "rewards/frontier_coverage_15": -0.01887217308394611, "rewards/frontier_coverage_20": -0.01887217308394611, "rewards/frontier_coverage_25": -0.01887217308394611, "rewards/frontier_coverage_5": -0.01887217308394611, "rewards/frontier_ece_reward": 0.02471662126481533, "rewards/frontier_entropy_batch_reward": -0.5334846794605255, "signal/accuracy_reward/centered_abs_mean": 0.21307508647441864, "signal/accuracy_reward/group_bin_occupancy": 0.21701388888888892, "signal/accuracy_reward/group_std_mean": 0.27209571599960325, "signal/accuracy_reward/group_zero_std_frac": 0.2638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10653754323720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10653754323720932, "signal/advantage_abs_mean": 0.14091622233390808, "signal/advantage_pre_scale_abs_mean": 0.14091622233390808, "signal/advantage_pre_scale_std": 0.20204322636127472, "signal/advantage_std": 0.20204322636127472, "signal/brier_reward/centered_abs_mean": 0.15218718945980073, "signal/brier_reward/group_bin_occupancy": 0.8690972222222222, "signal/brier_reward/group_std_mean": 0.19625934958457947, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015218720026314258, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015218720026314258, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06924531385302543, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7503472222222223, "signal/confidence_uniqueness_reward/group_std_mean": 0.09717238694429398, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0069245313294231895, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0069245313294231895, "signal/format_reward/centered_abs_mean": 0.03473849855363369, "signal/format_reward/group_bin_occupancy": 0.15416666666666665, "signal/format_reward/group_std_mean": 0.060436099767684937, "signal/format_reward/group_zero_std_frac": 0.7666666626930236, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017369249276816844, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017369249276816844, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016358074499294162, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7302083333333333, "signal/frontier_aurc_reward/group_std_mean": 0.002670387364923954, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.044759276031982e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.044759276031982e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12218185663223266, "signal/frontier_coverage_0/group_bin_occupancy": 0.8847222222222223, "signal/frontier_coverage_0/group_std_mean": 0.16369088590145112, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_1/centered_abs_mean": 0.12218185663223266, "signal/frontier_coverage_1/group_bin_occupancy": 0.8847222222222223, "signal/frontier_coverage_1/group_std_mean": 0.16369088590145112, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_10/centered_abs_mean": 0.12218185663223266, "signal/frontier_coverage_10/group_bin_occupancy": 0.8847222222222223, "signal/frontier_coverage_10/group_std_mean": 0.16369088590145112, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_15/centered_abs_mean": 0.12218185663223266, "signal/frontier_coverage_15/group_bin_occupancy": 0.8847222222222223, "signal/frontier_coverage_15/group_std_mean": 0.16369088590145112, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_20/centered_abs_mean": 0.12218185663223266, "signal/frontier_coverage_20/group_bin_occupancy": 0.8847222222222223, "signal/frontier_coverage_20/group_std_mean": 0.16369088590145112, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_25/centered_abs_mean": 0.12218185663223266, "signal/frontier_coverage_25/group_bin_occupancy": 0.8847222222222223, "signal/frontier_coverage_25/group_std_mean": 0.16369088590145112, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_5/centered_abs_mean": 0.12218185663223266, "signal/frontier_coverage_5/group_bin_occupancy": 0.8847222222222223, "signal/frontier_coverage_5/group_std_mean": 0.16369088590145112, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001527273189276457, "signal/frontier_ece_reward/centered_abs_mean": 0.06000325083732605, "signal/frontier_ece_reward/group_bin_occupancy": 0.6788194444444444, "signal/frontier_ece_reward/group_std_mean": 0.08099779933691025, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006000325083732605, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006000325083732605, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4390486657619476, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7121527777777776, "signal/frontier_entropy_batch_reward/group_std_mean": 0.500614058971405, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04390486851334572, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04390486851334572, "step": 35 }, { "calibration/aurc": 0.30461219130156075, "calibration/batch_distribution_entropy": 0.97860829400174, "calibration/batch_entropy_100bins": 0.9486484674822325, "calibration/batch_entropy_10bins": 0.97860829400174, "calibration/batch_entropy_50bins": 0.9682282190043502, "calibration/batch_uniqueness": 0.9518603939567309, "calibration/buffer_distribution_entropy": 0.6674029515556968, "calibration/buffer_entropy_100bins": 0.6329905407674977, "calibration/buffer_entropy_10bins": 0.6674029515556968, "calibration/buffer_entropy_50bins": 0.696431956314964, "calibration/confidence_entropy": 0.5217791211913715, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.06248734222348997, "calibration/coverage@20%": 0.11387957878723051, "calibration/coverage@25%": 0.34223374312873, "calibration/coverage@30%": 0.5964517660517661, "calibration/coverage@5%": 0.0, "calibration/ece": 0.21549805876997757, "calibration/mean_confidence": 0.5352754370110747, "calibration/prompt_uniqueness": 0.8878912054727662, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020572916666666653, "completions/max_length": 3790.6, "completions/max_terminated_length": 3790.6, "completions/mean_length": 589.0677978515625, "completions/mean_terminated_length": 601.5003784179687, "completions/min_length": 0.0, "completions/min_terminated_length": 152.4, "epoch": 0.09599880001499982, "grad_norm": 0.0004772288375534117, "learning_rate": 4.761904761904762e-06, "loss": -0.0183, "num_tokens": 71203642.0, "reward": 0.9513100028038025, "reward_std": 0.17211353182792663, "rewards/accuracy_reward": 0.6349826335906983, "rewards/brier_reward": 0.7103789806365967, "rewards/confidence_uniqueness_reward": 0.9307293176651001, "rewards/format_reward": 0.97734375, "rewards/frontier_aurc_reward": -0.002656676573678851, "rewards/frontier_coverage_0": -0.04805164374411106, "rewards/frontier_coverage_1": -0.04805164374411106, "rewards/frontier_coverage_10": -0.04805164374411106, "rewards/frontier_coverage_15": -0.04805164374411106, "rewards/frontier_coverage_20": -0.04805164374411106, "rewards/frontier_coverage_25": -0.04805164374411106, "rewards/frontier_coverage_5": -0.04805164374411106, "rewards/frontier_ece_reward": 0.015457052178680897, "rewards/frontier_entropy_batch_reward": -0.16271998584270478, "signal/accuracy_reward/centered_abs_mean": 0.19670681655406952, "signal/accuracy_reward/group_bin_occupancy": 0.21631944444444445, "signal/accuracy_reward/group_std_mean": 0.2593521386384964, "signal/accuracy_reward/group_zero_std_frac": 0.2694444447755814, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09835340827703476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09835340827703476, "signal/advantage_abs_mean": 0.12720865309238433, "signal/advantage_pre_scale_abs_mean": 0.12720865309238433, "signal/advantage_pre_scale_std": 0.19325639307498932, "signal/advantage_std": 0.19325639307498932, "signal/brier_reward/centered_abs_mean": 0.21643259525299072, "signal/brier_reward/group_bin_occupancy": 0.9309027777777779, "signal/brier_reward/group_std_mean": 0.2640606015920639, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02164325937628746, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02164325937628746, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.047277380526065824, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7392361111111112, "signal/confidence_uniqueness_reward/group_std_mean": 0.07938017547130585, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004727738164365292, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004727738164365292, "signal/format_reward/centered_abs_mean": 0.03805881068110466, "signal/format_reward/group_bin_occupancy": 0.15902777777777777, "signal/format_reward/group_std_mean": 0.06856417283415794, "signal/format_reward/group_zero_std_frac": 0.7277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01902940534055233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01902940534055233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017690456472337246, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6746527777777778, "signal/frontier_aurc_reward/group_std_mean": 0.00304986541159451, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.211307037214283e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.211307037214283e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.23642539083957673, "signal/frontier_coverage_0/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_0/group_std_mean": 0.3045207381248474, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_1/centered_abs_mean": 0.23642539083957673, "signal/frontier_coverage_1/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_1/group_std_mean": 0.3045207381248474, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_10/centered_abs_mean": 0.23642539083957673, "signal/frontier_coverage_10/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_10/group_std_mean": 0.3045207381248474, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_15/centered_abs_mean": 0.23642539083957673, "signal/frontier_coverage_15/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_15/group_std_mean": 0.3045207381248474, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_20/centered_abs_mean": 0.23642539083957673, "signal/frontier_coverage_20/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_20/group_std_mean": 0.3045207381248474, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_25/centered_abs_mean": 0.23642539083957673, "signal/frontier_coverage_25/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_25/group_std_mean": 0.3045207381248474, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_5/centered_abs_mean": 0.23642539083957673, "signal/frontier_coverage_5/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_5/group_std_mean": 0.3045207381248474, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002955317497253418, "signal/frontier_ece_reward/centered_abs_mean": 0.06959517598152161, "signal/frontier_ece_reward/group_bin_occupancy": 0.7576388888888889, "signal/frontier_ece_reward/group_std_mean": 0.09464571475982667, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006959517952054739, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006959517952054739, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2507960021495819, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8145833333333332, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32827151417732237, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025079600140452386, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025079600140452386, "step": 40 }, { "calibration/aurc": 0.2344727023359694, "calibration/batch_distribution_entropy": 0.9518525106541613, "calibration/batch_entropy_100bins": 0.9412130004420993, "calibration/batch_entropy_10bins": 0.9518525106541613, "calibration/batch_entropy_50bins": 0.9527945423184636, "calibration/batch_uniqueness": 0.9461262919998014, "calibration/buffer_distribution_entropy": 0.7459719739155918, "calibration/buffer_entropy_100bins": 0.7118780035095448, "calibration/buffer_entropy_10bins": 0.7459719739155918, "calibration/buffer_entropy_50bins": 0.7661675140410678, "calibration/confidence_entropy": 0.4776737674852377, "calibration/coverage@0%": 0.012736259244119067, "calibration/coverage@1%": 0.012736259244119067, "calibration/coverage@10%": 0.02381805343936973, "calibration/coverage@15%": 0.13136398524975118, "calibration/coverage@20%": 0.2600392937120426, "calibration/coverage@25%": 0.6212789086899664, "calibration/coverage@30%": 0.9968337730870713, "calibration/coverage@5%": 0.012736259244119067, "calibration/ece": 0.19628602513678026, "calibration/mean_confidence": 0.6154520747384761, "calibration/prompt_uniqueness": 0.8831628620958897, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023611111111111138, "completions/max_length": 3450.0, "completions/max_terminated_length": 3450.0, "completions/mean_length": 588.0378540039062, "completions/mean_terminated_length": 602.3594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 152.4, "epoch": 0.1079986500168748, "grad_norm": 0.00051538908155635, "learning_rate": 4.909638554216868e-06, "loss": -0.0195, "num_tokens": 81113102.0, "reward": 0.9507439851760864, "reward_std": 0.1727825313806534, "rewards/accuracy_reward": 0.6421006917953491, "rewards/brier_reward": 0.7137632369995117, "rewards/confidence_uniqueness_reward": 0.9256102681159973, "rewards/format_reward": 0.97578125, "rewards/frontier_aurc_reward": -0.002600804064422846, "rewards/frontier_coverage_0": -0.033070035930722955, "rewards/frontier_coverage_1": -0.033070035930722955, "rewards/frontier_coverage_10": -0.033070035930722955, "rewards/frontier_coverage_15": -0.033070035930722955, "rewards/frontier_coverage_20": -0.033070035930722955, "rewards/frontier_coverage_25": -0.033070035930722955, "rewards/frontier_coverage_5": -0.033070035930722955, "rewards/frontier_ece_reward": 0.023521846160292625, "rewards/frontier_entropy_batch_reward": -0.2156035676598549, "signal/accuracy_reward/centered_abs_mean": 0.19353841245174408, "signal/accuracy_reward/group_bin_occupancy": 0.21631944444444443, "signal/accuracy_reward/group_std_mean": 0.2572809010744095, "signal/accuracy_reward/group_zero_std_frac": 0.2694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09676920622587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09676920622587204, "signal/advantage_abs_mean": 0.12924040853977203, "signal/advantage_pre_scale_abs_mean": 0.12924040853977203, "signal/advantage_pre_scale_std": 0.1953139752149582, "signal/advantage_std": 0.1953139752149582, "signal/brier_reward/centered_abs_mean": 0.23464938700199128, "signal/brier_reward/group_bin_occupancy": 0.8972222222222221, "signal/brier_reward/group_std_mean": 0.2843640446662903, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02346493937075138, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02346493937075138, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05085535049438476, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7968749999999999, "signal/confidence_uniqueness_reward/group_std_mean": 0.07848654761910438, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005085535254329443, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005085535254329443, "signal/format_reward/centered_abs_mean": 0.03865559995174408, "signal/format_reward/group_bin_occupancy": 0.1545138888888889, "signal/format_reward/group_std_mean": 0.06427749693393707, "signal/format_reward/group_zero_std_frac": 0.7638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01932779997587204, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01932779997587204, "signal/frontier_aurc_reward/centered_abs_mean": 0.002126035187393427, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7104166666666668, "signal/frontier_aurc_reward/group_std_mean": 0.0032929918263107536, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6575440278975294e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6575440278975294e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.23452837765216827, "signal/frontier_coverage_0/group_bin_occupancy": 0.8694444444444445, "signal/frontier_coverage_0/group_std_mean": 0.31311619877815244, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_1/centered_abs_mean": 0.23452837765216827, "signal/frontier_coverage_1/group_bin_occupancy": 0.8694444444444445, "signal/frontier_coverage_1/group_std_mean": 0.31311619877815244, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_10/centered_abs_mean": 0.23452837765216827, "signal/frontier_coverage_10/group_bin_occupancy": 0.8694444444444445, "signal/frontier_coverage_10/group_std_mean": 0.31311619877815244, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_15/centered_abs_mean": 0.23452837765216827, "signal/frontier_coverage_15/group_bin_occupancy": 0.8694444444444445, "signal/frontier_coverage_15/group_std_mean": 0.31311619877815244, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_20/centered_abs_mean": 0.23452837765216827, "signal/frontier_coverage_20/group_bin_occupancy": 0.8694444444444445, "signal/frontier_coverage_20/group_std_mean": 0.31311619877815244, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_25/centered_abs_mean": 0.23452837765216827, "signal/frontier_coverage_25/group_bin_occupancy": 0.8694444444444445, "signal/frontier_coverage_25/group_std_mean": 0.31311619877815244, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_5/centered_abs_mean": 0.23452837765216827, "signal/frontier_coverage_5/group_bin_occupancy": 0.8694444444444445, "signal/frontier_coverage_5/group_std_mean": 0.31311619877815244, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002931604813784361, "signal/frontier_ece_reward/centered_abs_mean": 0.08764429241418839, "signal/frontier_ece_reward/group_bin_occupancy": 0.809375, "signal/frontier_ece_reward/group_std_mean": 0.11243927627801895, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00876442939043045, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00876442939043045, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3069328278303146, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8336805555555555, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3807151556015015, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030693282932043077, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030693282932043077, "step": 45 }, { "calibration/aurc": 0.40581392002668315, "calibration/batch_distribution_entropy": 0.9864032887347619, "calibration/batch_entropy_100bins": 0.9648712886165287, "calibration/batch_entropy_10bins": 0.9864032887347619, "calibration/batch_entropy_50bins": 0.9766999165423931, "calibration/batch_uniqueness": 0.953430626200465, "calibration/buffer_distribution_entropy": 0.7885463322323418, "calibration/buffer_entropy_100bins": 0.7637818833081469, "calibration/buffer_entropy_10bins": 0.7885463322323418, "calibration/buffer_entropy_50bins": 0.8096014118372388, "calibration/confidence_entropy": 0.48178658393199997, "calibration/coverage@0%": 0.00478037274518404, "calibration/coverage@1%": 0.00478037274518404, "calibration/coverage@10%": 0.00478037274518404, "calibration/coverage@15%": 0.01598037274518404, "calibration/coverage@20%": 0.018613443611325775, "calibration/coverage@25%": 0.06659603789230381, "calibration/coverage@30%": 0.11990588262883446, "calibration/coverage@5%": 0.00478037274518404, "calibration/ece": 0.2005017801122319, "calibration/mean_confidence": 0.5256280232531849, "calibration/prompt_uniqueness": 0.8879285110994667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017274305555555536, "completions/max_length": 3381.8, "completions/max_terminated_length": 3381.8, "completions/mean_length": 594.59775390625, "completions/mean_terminated_length": 605.0348510742188, "completions/min_length": 0.0, "completions/min_terminated_length": 157.6, "epoch": 0.11999850001874976, "grad_norm": 0.0005370384315028787, "learning_rate": 4.759036144578314e-06, "loss": -0.0163, "num_tokens": 91060468.0, "reward": 0.9574363589286804, "reward_std": 0.16030279099941253, "rewards/accuracy_reward": 0.6344617962837219, "rewards/brier_reward": 0.7066903710365295, "rewards/confidence_uniqueness_reward": 0.9360496401786804, "rewards/format_reward": 0.9825520992279053, "rewards/frontier_aurc_reward": -0.0024419894441962244, "rewards/frontier_coverage_0": -0.03829344231635332, "rewards/frontier_coverage_1": -0.03829344231635332, "rewards/frontier_coverage_10": -0.03829344231635332, "rewards/frontier_coverage_15": -0.03829344231635332, "rewards/frontier_coverage_20": -0.03829344231635332, "rewards/frontier_coverage_25": -0.03829344231635332, "rewards/frontier_coverage_5": -0.03829344231635332, "rewards/frontier_ece_reward": 0.015390362963080407, "rewards/frontier_entropy_batch_reward": -0.1350240170955658, "signal/accuracy_reward/centered_abs_mean": 0.18443467915058137, "signal/accuracy_reward/group_bin_occupancy": 0.21006944444444442, "signal/accuracy_reward/group_std_mean": 0.24160374104976653, "signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09221733957529069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09221733957529069, "signal/advantage_abs_mean": 0.11947052925825119, "signal/advantage_pre_scale_abs_mean": 0.11947052925825119, "signal/advantage_pre_scale_std": 0.18084822595119476, "signal/advantage_std": 0.18084822595119476, "signal/brier_reward/centered_abs_mean": 0.24059803187847137, "signal/brier_reward/group_bin_occupancy": 0.9128472222222221, "signal/brier_reward/group_std_mean": 0.2905759453773499, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024059804528951644, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.024059804528951644, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03946094214916229, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7940972222222223, "signal/confidence_uniqueness_reward/group_std_mean": 0.06758813932538033, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003946094121783972, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003946094121783972, "signal/format_reward/centered_abs_mean": 0.02996419295668602, "signal/format_reward/group_bin_occupancy": 0.15381944444444443, "signal/format_reward/group_std_mean": 0.05656718313694, "signal/format_reward/group_zero_std_frac": 0.7694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01498209647834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01498209647834301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017862386535853147, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7190972222222223, "signal/frontier_aurc_reward/group_std_mean": 0.0027869833167642353, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2327983970171772e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2327983970171772e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.26630950570106504, "signal/frontier_coverage_0/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_0/group_std_mean": 0.34513433575630187, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_1/centered_abs_mean": 0.26630950570106504, "signal/frontier_coverage_1/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_1/group_std_mean": 0.34513433575630187, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_10/centered_abs_mean": 0.26630950570106504, "signal/frontier_coverage_10/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_10/group_std_mean": 0.34513433575630187, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_15/centered_abs_mean": 0.26630950570106504, "signal/frontier_coverage_15/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_15/group_std_mean": 0.34513433575630187, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_20/centered_abs_mean": 0.26630950570106504, "signal/frontier_coverage_20/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_20/group_std_mean": 0.34513433575630187, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_25/centered_abs_mean": 0.26630950570106504, "signal/frontier_coverage_25/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_25/group_std_mean": 0.34513433575630187, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_5/centered_abs_mean": 0.26630950570106504, "signal/frontier_coverage_5/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_5/group_std_mean": 0.34513433575630187, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033288690727204086, "signal/frontier_ece_reward/centered_abs_mean": 0.08141317814588547, "signal/frontier_ece_reward/group_bin_occupancy": 0.8072916666666666, "signal/frontier_ece_reward/group_std_mean": 0.10631832182407379, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008141317777335644, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008141317777335644, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22239840626716614, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7815972222222223, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2998348593711853, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022239841893315314, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022239841893315314, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.2109596362293293, "eval_calibration/batch_distribution_entropy": 0.9226056413086375, "eval_calibration/batch_entropy_100bins": 0.6951142630838091, "eval_calibration/batch_entropy_10bins": 0.9226056413086375, "eval_calibration/batch_entropy_50bins": 0.7795849121323298, "eval_calibration/batch_uniqueness": 0.8931600654699965, "eval_calibration/buffer_distribution_entropy": 0.815512391941836, "eval_calibration/buffer_entropy_100bins": 0.7914134740073022, "eval_calibration/buffer_entropy_10bins": 0.815512391941836, "eval_calibration/buffer_entropy_50bins": 0.8336375838667806, "eval_calibration/confidence_entropy": 0.49500069308362776, "eval_calibration/coverage@0%": 0.13205645161290322, "eval_calibration/coverage@1%": 0.13205645161290322, "eval_calibration/coverage@10%": 0.26915322580645157, "eval_calibration/coverage@15%": 0.3790322580645162, "eval_calibration/coverage@20%": 0.6399529569892474, "eval_calibration/coverage@25%": 0.7288306451612904, "eval_calibration/coverage@30%": 0.8385416666666666, "eval_calibration/coverage@5%": 0.13205645161290322, "eval_calibration/ece": 0.25606512017994004, "eval_calibration/mean_confidence": 0.5449926770716114, "eval_calibration/prompt_uniqueness": 0.8931600654699965, "eval_completions/clipped_ratio": 0.018229166666666668, "eval_completions/max_length": 2121.5, "eval_completions/max_terminated_length": 2121.5, "eval_completions/mean_length": 582.2923787434896, "eval_completions/mean_terminated_length": 593.1960754394531, "eval_completions/min_length": 46.5, "eval_completions/min_terminated_length": 204.0, "eval_loss": 0.0, "eval_num_tokens": 91060468.0, "eval_reward": 0.8969475229581197, "eval_reward_std": 0.25840714077154797, "eval_rewards/accuracy_reward": 0.6249999900658926, "eval_rewards/brier_reward": 0.7243680159250895, "eval_rewards/confidence_uniqueness_reward": 0.8756765027840933, "eval_rewards/format_reward": 0.9774305621782938, "eval_rewards/frontier_aurc_reward": -0.0024169180736256144, "eval_rewards/frontier_coverage_0": -0.014742235808322826, "eval_rewards/frontier_coverage_1": -0.014742235808322826, "eval_rewards/frontier_coverage_10": -0.014742235808322826, "eval_rewards/frontier_coverage_15": -0.014742235808322826, "eval_rewards/frontier_coverage_20": -0.014742235808322826, "eval_rewards/frontier_coverage_25": -0.014742235808322826, "eval_rewards/frontier_coverage_5": -0.014742235808322826, "eval_rewards/frontier_ece_reward": 0.015066012740135193, "eval_rewards/frontier_entropy_batch_reward": -0.6445865134398142, "eval_runtime": 205.5472, "eval_samples_per_second": 4.865, "eval_signal/accuracy_reward/centered_abs_mean": 0.4510633647441864, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4817399134238561, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2255316823720932, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2255316823720932, "eval_signal/advantage_abs_mean": 0.21911720434824625, "eval_signal/advantage_pre_scale_abs_mean": 0.21911720434824625, "eval_signal/advantage_pre_scale_std": 0.2567944601178169, "eval_signal/advantage_std": 0.2567944601178169, "eval_signal/brier_reward/centered_abs_mean": 0.23708807677030563, "eval_signal/brier_reward/group_bin_occupancy": 0.9201388888888888, "eval_signal/brier_reward/group_std_mean": 0.2897856483856837, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023708807304501534, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.023708807304501534, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06476977219184239, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3819444444444444, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1230657051006953, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006476977374404669, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006476977374404669, "eval_signal/format_reward/centered_abs_mean": 0.043077257461845875, "eval_signal/format_reward/group_bin_occupancy": 0.19444444444444445, "eval_signal/format_reward/group_std_mean": 0.10973560561736424, "eval_signal/format_reward/group_zero_std_frac": 0.4444444527228673, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.021538628730922937, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.021538628730922937, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020677158997083702, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7222222222222222, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0035327961280321083, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.584644911015251e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.584644911015251e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2857043494780858, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9479166666666666, "eval_signal/frontier_coverage_0/group_std_mean": 0.39817163348197937, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2857043494780858, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9479166666666666, "eval_signal/frontier_coverage_1/group_std_mean": 0.39817163348197937, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.2857043494780858, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9479166666666666, "eval_signal/frontier_coverage_10/group_std_mean": 0.39817163348197937, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2857043494780858, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9479166666666666, "eval_signal/frontier_coverage_15/group_std_mean": 0.39817163348197937, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2857043494780858, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9479166666666666, "eval_signal/frontier_coverage_20/group_std_mean": 0.39817163348197937, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2857043494780858, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9479166666666666, "eval_signal/frontier_coverage_25/group_std_mean": 0.39817163348197937, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2857043494780858, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9479166666666666, "eval_signal/frontier_coverage_5/group_std_mean": 0.39817163348197937, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035713044150422015, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.07295310931901137, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8715277777777777, "eval_signal/frontier_ece_reward/group_std_mean": 0.0984811931848526, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007295310885335009, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007295310885335009, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3177054176727931, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3194444444444444, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33345575133959454, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031770541022221245, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031770541022221245, "eval_steps_per_second": 0.029, "step": 50 }, { "calibration/aurc": 0.2694222235390976, "calibration/batch_distribution_entropy": 0.9743103185657492, "calibration/batch_entropy_100bins": 0.9569912029449112, "calibration/batch_entropy_10bins": 0.9743103185657492, "calibration/batch_entropy_50bins": 0.9695456736926709, "calibration/batch_uniqueness": 0.9509898009239965, "calibration/buffer_distribution_entropy": 0.8302606383123452, "calibration/buffer_entropy_100bins": 0.807123888242816, "calibration/buffer_entropy_10bins": 0.8302606383123452, "calibration/buffer_entropy_50bins": 0.8468961491187944, "calibration/confidence_entropy": 0.5156970864044457, "calibration/coverage@0%": 0.0036787068283131276, "calibration/coverage@1%": 0.0036787068283131276, "calibration/coverage@10%": 0.023101279006790817, "calibration/coverage@15%": 0.06956951427694946, "calibration/coverage@20%": 0.3806686646144719, "calibration/coverage@25%": 0.5535145450606225, "calibration/coverage@30%": 0.7110186806437707, "calibration/coverage@5%": 0.0036787068283131276, "calibration/ece": 0.1878179475943192, "calibration/mean_confidence": 0.5640014221751124, "calibration/prompt_uniqueness": 0.8892825716455885, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018836805555555558, "completions/max_length": 3432.4, "completions/max_terminated_length": 3432.4, "completions/mean_length": 611.9341186523437, "completions/mean_terminated_length": 623.7887573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 145.4, "epoch": 0.13199835002062474, "grad_norm": 0.00044835961307398975, "learning_rate": 4.60843373493976e-06, "loss": -0.0164, "num_tokens": 101190525.0, "reward": 0.9691686749458313, "reward_std": 0.15596783459186553, "rewards/accuracy_reward": 0.6544270634651184, "rewards/brier_reward": 0.728352153301239, "rewards/confidence_uniqueness_reward": 0.9341305613517761, "rewards/format_reward": 0.9808159828186035, "rewards/frontier_aurc_reward": -0.0021917944541200995, "rewards/frontier_coverage_0": -0.03554604309611022, "rewards/frontier_coverage_1": -0.03554604309611022, "rewards/frontier_coverage_10": -0.03554604309611022, "rewards/frontier_coverage_15": -0.03554604309611022, "rewards/frontier_coverage_20": -0.03554604309611022, "rewards/frontier_coverage_25": -0.03554604309611022, "rewards/frontier_coverage_5": -0.03554604309611022, "rewards/frontier_ece_reward": 0.015853497385978698, "rewards/frontier_entropy_batch_reward": -0.13148798942565917, "signal/accuracy_reward/centered_abs_mean": 0.1805935323238373, "signal/accuracy_reward/group_bin_occupancy": 0.20694444444444443, "signal/accuracy_reward/group_std_mean": 0.23551449477672576, "signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09029676616191865, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09029676616191865, "signal/advantage_abs_mean": 0.1168292984366417, "signal/advantage_pre_scale_abs_mean": 0.1168292984366417, "signal/advantage_pre_scale_std": 0.18130592703819276, "signal/advantage_std": 0.18130592703819276, "signal/brier_reward/centered_abs_mean": 0.2102464973926544, "signal/brier_reward/group_bin_occupancy": 0.9072916666666668, "signal/brier_reward/group_std_mean": 0.2576879024505615, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02102465070784092, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02102465070784092, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.040306436270475386, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7975694444444443, "signal/confidence_uniqueness_reward/group_std_mean": 0.06543851867318154, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004030643822625279, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004030643822625279, "signal/format_reward/centered_abs_mean": 0.03095160648226738, "signal/format_reward/group_bin_occupancy": 0.15138888888888888, "signal/format_reward/group_std_mean": 0.05426007434725762, "signal/format_reward/group_zero_std_frac": 0.7888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01547580324113369, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01547580324113369, "signal/frontier_aurc_reward/centered_abs_mean": 0.001665117172524333, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7086805555555556, "signal/frontier_aurc_reward/group_std_mean": 0.0026435004081577064, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.081396560242865e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.081396560242865e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2305227130651474, "signal/frontier_coverage_0/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_0/group_std_mean": 0.30123440027236936, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_1/centered_abs_mean": 0.2305227130651474, "signal/frontier_coverage_1/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_1/group_std_mean": 0.30123440027236936, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_10/centered_abs_mean": 0.2305227130651474, "signal/frontier_coverage_10/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_10/group_std_mean": 0.30123440027236936, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_15/centered_abs_mean": 0.2305227130651474, "signal/frontier_coverage_15/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_15/group_std_mean": 0.30123440027236936, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_20/centered_abs_mean": 0.2305227130651474, "signal/frontier_coverage_20/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_20/group_std_mean": 0.30123440027236936, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_25/centered_abs_mean": 0.2305227130651474, "signal/frontier_coverage_25/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_25/group_std_mean": 0.30123440027236936, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_5/centered_abs_mean": 0.2305227130651474, "signal/frontier_coverage_5/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_5/group_std_mean": 0.30123440027236936, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028815338853746654, "signal/frontier_ece_reward/centered_abs_mean": 0.06926655918359756, "signal/frontier_ece_reward/group_bin_occupancy": 0.7784722222222222, "signal/frontier_ece_reward/group_std_mean": 0.09231365174055099, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006926656048744917, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006926656048744917, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21102777123451233, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8166666666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.283634626865387, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021102776750922203, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021102776750922203, "step": 55 }, { "calibration/aurc": 0.3307035914636969, "calibration/batch_distribution_entropy": 0.9677243577600381, "calibration/batch_entropy_100bins": 0.9571977793903559, "calibration/batch_entropy_10bins": 0.9677243577600381, "calibration/batch_entropy_50bins": 0.9681875033490638, "calibration/batch_uniqueness": 0.9505193455375718, "calibration/buffer_distribution_entropy": 0.8528815324002004, "calibration/buffer_entropy_100bins": 0.8344222403886594, "calibration/buffer_entropy_10bins": 0.8528815324002004, "calibration/buffer_entropy_50bins": 0.8685746685852462, "calibration/confidence_entropy": 0.5152009551346997, "calibration/coverage@0%": 0.01153219045162198, "calibration/coverage@1%": 0.01153219045162198, "calibration/coverage@10%": 0.08744842081811413, "calibration/coverage@15%": 0.22566831610607224, "calibration/coverage@20%": 0.3026316668914125, "calibration/coverage@25%": 0.37020823239169576, "calibration/coverage@30%": 0.43324681560185196, "calibration/coverage@5%": 0.032998159038009414, "calibration/ece": 0.19605623757236762, "calibration/mean_confidence": 0.5866015536543909, "calibration/prompt_uniqueness": 0.8868937097156987, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015798611111111117, "completions/max_length": 3490.0, "completions/max_terminated_length": 3490.0, "completions/mean_length": 626.966845703125, "completions/mean_terminated_length": 637.0339233398438, "completions/min_length": 0.0, "completions/min_terminated_length": 158.4, "epoch": 0.14399820002249972, "grad_norm": 0.00042762517114169896, "learning_rate": 4.457831325301205e-06, "loss": -0.0143, "num_tokens": 111509759.0, "reward": 0.9568191528320312, "reward_std": 0.15329523682594298, "rewards/accuracy_reward": 0.6266492962837219, "rewards/brier_reward": 0.7438425660133362, "rewards/confidence_uniqueness_reward": 0.9345135450363159, "rewards/format_reward": 0.9841145753860474, "rewards/frontier_aurc_reward": -0.0022769244387745857, "rewards/frontier_coverage_0": -0.002498930087313056, "rewards/frontier_coverage_1": -0.002498930087313056, "rewards/frontier_coverage_10": -0.002498930087313056, "rewards/frontier_coverage_15": -0.002498930087313056, "rewards/frontier_coverage_20": -0.002498930087313056, "rewards/frontier_coverage_25": -0.002498930087313056, "rewards/frontier_coverage_5": -0.002498930087313056, "rewards/frontier_ece_reward": 0.02260393425822258, "rewards/frontier_entropy_batch_reward": -0.18411682844161986, "signal/accuracy_reward/centered_abs_mean": 0.18255750834941864, "signal/accuracy_reward/group_bin_occupancy": 0.21145833333333336, "signal/accuracy_reward/group_std_mean": 0.24254016876220702, "signal/accuracy_reward/group_zero_std_frac": 0.3083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09127875417470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09127875417470932, "signal/advantage_abs_mean": 0.11400031745433807, "signal/advantage_pre_scale_abs_mean": 0.11400031745433807, "signal/advantage_pre_scale_std": 0.17615911066532136, "signal/advantage_std": 0.17615911066532136, "signal/brier_reward/centered_abs_mean": 0.19489111006259918, "signal/brier_reward/group_bin_occupancy": 0.8920138888888889, "signal/brier_reward/group_std_mean": 0.24229688942432404, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019489111378788948, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019489111378788948, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037897521257400514, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8170138888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.062398982048034665, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003789752395823598, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003789752395823598, "signal/format_reward/centered_abs_mean": 0.02632921040058136, "signal/format_reward/group_bin_occupancy": 0.1496527777777778, "signal/format_reward/group_std_mean": 0.04854804500937462, "signal/format_reward/group_zero_std_frac": 0.8027777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01316460520029068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01316460520029068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019377078860998154, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7121527777777779, "signal/frontier_aurc_reward/group_std_mean": 0.0030613655224442484, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.422134857624769e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.422134857624769e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2110768437385559, "signal/frontier_coverage_0/group_bin_occupancy": 0.8638888888888889, "signal/frontier_coverage_0/group_std_mean": 0.28098778128623964, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_1/centered_abs_mean": 0.2110768437385559, "signal/frontier_coverage_1/group_bin_occupancy": 0.8638888888888889, "signal/frontier_coverage_1/group_std_mean": 0.28098778128623964, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_10/centered_abs_mean": 0.2110768437385559, "signal/frontier_coverage_10/group_bin_occupancy": 0.8638888888888889, "signal/frontier_coverage_10/group_std_mean": 0.28098778128623964, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_15/centered_abs_mean": 0.2110768437385559, "signal/frontier_coverage_15/group_bin_occupancy": 0.8638888888888889, "signal/frontier_coverage_15/group_std_mean": 0.28098778128623964, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_20/centered_abs_mean": 0.2110768437385559, "signal/frontier_coverage_20/group_bin_occupancy": 0.8638888888888889, "signal/frontier_coverage_20/group_std_mean": 0.28098778128623964, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_25/centered_abs_mean": 0.2110768437385559, "signal/frontier_coverage_25/group_bin_occupancy": 0.8638888888888889, "signal/frontier_coverage_25/group_std_mean": 0.28098778128623964, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_5/centered_abs_mean": 0.2110768437385559, "signal/frontier_coverage_5/group_bin_occupancy": 0.8638888888888889, "signal/frontier_coverage_5/group_std_mean": 0.28098778128623964, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002638460695743561, "signal/frontier_ece_reward/centered_abs_mean": 0.06807545423507691, "signal/frontier_ece_reward/group_bin_occupancy": 0.7677083333333334, "signal/frontier_ece_reward/group_std_mean": 0.08898028582334519, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006807545572519303, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006807545572519303, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2613932341337204, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8180555555555558, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33782604336738586, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02613932266831398, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02613932266831398, "step": 60 }, { "calibration/aurc": 0.23300829060517123, "calibration/batch_distribution_entropy": 0.9831016338957796, "calibration/batch_entropy_100bins": 0.9632753017365372, "calibration/batch_entropy_10bins": 0.9831016338957796, "calibration/batch_entropy_50bins": 0.9759229135714376, "calibration/batch_uniqueness": 0.9525056216175093, "calibration/buffer_distribution_entropy": 0.8715169058505172, "calibration/buffer_entropy_100bins": 0.8564367522033102, "calibration/buffer_entropy_10bins": 0.8715169058505172, "calibration/buffer_entropy_50bins": 0.8863222147297156, "calibration/confidence_entropy": 0.48784240565714115, "calibration/coverage@0%": 0.02786145806735325, "calibration/coverage@1%": 0.02786145806735325, "calibration/coverage@10%": 0.1591952861236417, "calibration/coverage@15%": 0.4360120187533691, "calibration/coverage@20%": 0.5692992045422305, "calibration/coverage@25%": 0.6198779772037336, "calibration/coverage@30%": 0.7344086021505376, "calibration/coverage@5%": 0.04193082921950804, "calibration/ece": 0.19008310139291312, "calibration/mean_confidence": 0.5527640346426371, "calibration/prompt_uniqueness": 0.87965394467687, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3113.2, "completions/max_terminated_length": 3113.2, "completions/mean_length": 610.5935913085938, "completions/mean_terminated_length": 616.9865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 147.4, "epoch": 0.1559980500243747, "grad_norm": 0.0005515015218406916, "learning_rate": 4.307228915662651e-06, "loss": -0.0085, "num_tokens": 121637845.0, "reward": 0.9779425144195557, "reward_std": 0.13538099378347396, "rewards/accuracy_reward": 0.6507812380790711, "rewards/brier_reward": 0.7501181960105896, "rewards/confidence_uniqueness_reward": 0.9430691480636597, "rewards/format_reward": 0.9895833373069763, "rewards/frontier_aurc_reward": -0.0019629735965281726, "rewards/frontier_coverage_0": -0.008767739811446518, "rewards/frontier_coverage_1": -0.008767739811446518, "rewards/frontier_coverage_10": -0.008767739811446518, "rewards/frontier_coverage_15": -0.008767739811446518, "rewards/frontier_coverage_20": -0.008767739811446518, "rewards/frontier_coverage_25": -0.008767739811446518, "rewards/frontier_coverage_5": -0.008767739811446518, "rewards/frontier_ece_reward": 0.020754358358681203, "rewards/frontier_entropy_batch_reward": -0.12842243015766144, "signal/accuracy_reward/centered_abs_mean": 0.167333984375, "signal/accuracy_reward/group_bin_occupancy": 0.20243055555555553, "signal/accuracy_reward/group_std_mean": 0.2216338872909546, "signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0836669921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0836669921875, "signal/advantage_abs_mean": 0.09993359893560409, "signal/advantage_pre_scale_abs_mean": 0.09993359893560409, "signal/advantage_pre_scale_std": 0.15722057819366456, "signal/advantage_std": 0.15722057819366456, "signal/brier_reward/centered_abs_mean": 0.19362751245498658, "signal/brier_reward/group_bin_occupancy": 0.875, "signal/brier_reward/group_std_mean": 0.24217391312122344, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019362751767039298, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019362751767039298, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028328100219368935, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8430555555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.04791910648345947, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028328101616352797, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028328101616352797, "signal/format_reward/centered_abs_mean": 0.01808810755610466, "signal/format_reward/group_bin_occupancy": 0.1440972222222222, "signal/format_reward/group_std_mean": 0.03565124273300171, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00904405377805233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00904405377805233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016498573124408722, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6878472222222223, "signal/frontier_aurc_reward/group_std_mean": 0.0026893588714301587, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0623216914827936e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0623216914827936e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2315441280603409, "signal/frontier_coverage_0/group_bin_occupancy": 0.8649305555555555, "signal/frontier_coverage_0/group_std_mean": 0.3030831813812256, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_1/centered_abs_mean": 0.2315441280603409, "signal/frontier_coverage_1/group_bin_occupancy": 0.8649305555555555, "signal/frontier_coverage_1/group_std_mean": 0.3030831813812256, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_10/centered_abs_mean": 0.2315441280603409, "signal/frontier_coverage_10/group_bin_occupancy": 0.8649305555555555, "signal/frontier_coverage_10/group_std_mean": 0.3030831813812256, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_15/centered_abs_mean": 0.2315441280603409, "signal/frontier_coverage_15/group_bin_occupancy": 0.8649305555555555, "signal/frontier_coverage_15/group_std_mean": 0.3030831813812256, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_20/centered_abs_mean": 0.2315441280603409, "signal/frontier_coverage_20/group_bin_occupancy": 0.8649305555555555, "signal/frontier_coverage_20/group_std_mean": 0.3030831813812256, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_25/centered_abs_mean": 0.2315441280603409, "signal/frontier_coverage_25/group_bin_occupancy": 0.8649305555555555, "signal/frontier_coverage_25/group_std_mean": 0.3030831813812256, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_5/centered_abs_mean": 0.2315441280603409, "signal/frontier_coverage_5/group_bin_occupancy": 0.8649305555555555, "signal/frontier_coverage_5/group_std_mean": 0.3030831813812256, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002894301526248455, "signal/frontier_ece_reward/centered_abs_mean": 0.06531385183334351, "signal/frontier_ece_reward/group_bin_occupancy": 0.7784722222222221, "signal/frontier_ece_reward/group_std_mean": 0.08503876328468322, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006531385611742735, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006531385611742735, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20490280091762542, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7586805555555556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.27364385724067686, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02049028016626835, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02049028016626835, "step": 65 }, { "calibration/aurc": 0.29206435720557744, "calibration/batch_distribution_entropy": 0.967741577432348, "calibration/batch_entropy_100bins": 0.9547615908625235, "calibration/batch_entropy_10bins": 0.967741577432348, "calibration/batch_entropy_50bins": 0.9660211465920273, "calibration/batch_uniqueness": 0.9491694668612134, "calibration/buffer_distribution_entropy": 0.8853528168662106, "calibration/buffer_entropy_100bins": 0.8734316453290203, "calibration/buffer_entropy_10bins": 0.8853528168662106, "calibration/buffer_entropy_50bins": 0.8995535410048392, "calibration/confidence_entropy": 0.47921780511639545, "calibration/coverage@0%": 0.008454719126847999, "calibration/coverage@1%": 0.008454719126847999, "calibration/coverage@10%": 0.10795939337610916, "calibration/coverage@15%": 0.16163093522390154, "calibration/coverage@20%": 0.20303708239739757, "calibration/coverage@25%": 0.27123720908418275, "calibration/coverage@30%": 0.46396145357735036, "calibration/coverage@5%": 0.008454719126847999, "calibration/ece": 0.1539259917359437, "calibration/mean_confidence": 0.590760119025805, "calibration/prompt_uniqueness": 0.8780244614506524, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01163194444444442, "completions/max_length": 3017.8, "completions/max_terminated_length": 3017.8, "completions/mean_length": 598.3328247070312, "completions/mean_terminated_length": 605.3543090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 137.2, "epoch": 0.16799790002624967, "grad_norm": 0.00044609271571971476, "learning_rate": 4.156626506024097e-06, "loss": -0.01, "num_tokens": 131608783.0, "reward": 0.964812970161438, "reward_std": 0.13441329300403596, "rewards/accuracy_reward": 0.6355902791023255, "rewards/brier_reward": 0.7468560457229614, "rewards/confidence_uniqueness_reward": 0.9387478590011596, "rewards/format_reward": 0.9881944537162781, "rewards/frontier_aurc_reward": -0.002175836288370192, "rewards/frontier_coverage_0": 0.000884566456079483, "rewards/frontier_coverage_1": 0.000884566456079483, "rewards/frontier_coverage_10": 0.000884566456079483, "rewards/frontier_coverage_15": 0.000884566456079483, "rewards/frontier_coverage_20": 0.000884566456079483, "rewards/frontier_coverage_25": 0.000884566456079483, "rewards/frontier_coverage_5": 0.000884566456079483, "rewards/frontier_ece_reward": 0.02232353687286377, "rewards/frontier_entropy_batch_reward": -0.17922367453575133, "signal/accuracy_reward/centered_abs_mean": 0.16927083134651183, "signal/accuracy_reward/group_bin_occupancy": 0.19895833333333335, "signal/accuracy_reward/group_std_mean": 0.2159910023212433, "signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08463541567325591, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08463541567325591, "signal/advantage_abs_mean": 0.10277295261621475, "signal/advantage_pre_scale_abs_mean": 0.10277295261621475, "signal/advantage_pre_scale_std": 0.16154861450195312, "signal/advantage_std": 0.16154861450195312, "signal/brier_reward/centered_abs_mean": 0.18858011364936828, "signal/brier_reward/group_bin_occupancy": 0.8704861111111111, "signal/brier_reward/group_std_mean": 0.23463993072509765, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018858011066913604, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018858011066913604, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03243453465402126, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8458333333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.05219922661781311, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003243453614413738, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003243453614413738, "signal/format_reward/centered_abs_mean": 0.01965060755610466, "signal/format_reward/group_bin_occupancy": 0.14340277777777777, "signal/format_reward/group_std_mean": 0.03690010011196136, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00982530377805233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00982530377805233, "signal/frontier_aurc_reward/centered_abs_mean": 0.001957321958616376, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6996527777777778, "signal/frontier_aurc_reward/group_std_mean": 0.003007865697145462, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.446652579237707e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.446652579237707e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2180919259786606, "signal/frontier_coverage_0/group_bin_occupancy": 0.846875, "signal/frontier_coverage_0/group_std_mean": 0.2869709312915802, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_1/centered_abs_mean": 0.2180919259786606, "signal/frontier_coverage_1/group_bin_occupancy": 0.846875, "signal/frontier_coverage_1/group_std_mean": 0.2869709312915802, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_10/centered_abs_mean": 0.2180919259786606, "signal/frontier_coverage_10/group_bin_occupancy": 0.846875, "signal/frontier_coverage_10/group_std_mean": 0.2869709312915802, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_15/centered_abs_mean": 0.2180919259786606, "signal/frontier_coverage_15/group_bin_occupancy": 0.846875, "signal/frontier_coverage_15/group_std_mean": 0.2869709312915802, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_20/centered_abs_mean": 0.2180919259786606, "signal/frontier_coverage_20/group_bin_occupancy": 0.846875, "signal/frontier_coverage_20/group_std_mean": 0.2869709312915802, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_25/centered_abs_mean": 0.2180919259786606, "signal/frontier_coverage_25/group_bin_occupancy": 0.846875, "signal/frontier_coverage_25/group_std_mean": 0.2869709312915802, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_5/centered_abs_mean": 0.2180919259786606, "signal/frontier_coverage_5/group_bin_occupancy": 0.846875, "signal/frontier_coverage_5/group_std_mean": 0.2869709312915802, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027261491399258376, "signal/frontier_ece_reward/centered_abs_mean": 0.06389048919081688, "signal/frontier_ece_reward/group_bin_occupancy": 0.7607638888888889, "signal/frontier_ece_reward/group_std_mean": 0.08154775202274323, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006389048788696528, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006389048788696528, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25127990543842316, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3219525694847107, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025127990543842314, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025127990543842314, "step": 70 }, { "calibration/aurc": 0.24865885558565473, "calibration/batch_distribution_entropy": 0.96489969481927, "calibration/batch_entropy_100bins": 0.9516288732160346, "calibration/batch_entropy_10bins": 0.96489969481927, "calibration/batch_entropy_50bins": 0.9641975783352297, "calibration/batch_uniqueness": 0.948999975102953, "calibration/buffer_distribution_entropy": 0.8949759127137261, "calibration/buffer_entropy_100bins": 0.8868731831868493, "calibration/buffer_entropy_10bins": 0.8949759127137261, "calibration/buffer_entropy_50bins": 0.9096876693511307, "calibration/confidence_entropy": 0.5163309735163633, "calibration/coverage@0%": 0.00838168656056587, "calibration/coverage@1%": 0.00838168656056587, "calibration/coverage@10%": 0.1720974248452697, "calibration/coverage@15%": 0.24661665561450047, "calibration/coverage@20%": 0.3965249226348364, "calibration/coverage@25%": 0.6515518841034948, "calibration/coverage@30%": 0.7176282051282051, "calibration/coverage@5%": 0.016715019893899206, "calibration/ece": 0.18913358089517085, "calibration/mean_confidence": 0.5699598192734223, "calibration/prompt_uniqueness": 0.8794210313331021, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005902777777777768, "completions/max_length": 2728.0, "completions/max_terminated_length": 2728.0, "completions/mean_length": 617.7827270507812, "completions/mean_terminated_length": 621.47099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 182.2, "epoch": 0.17999775002812465, "grad_norm": 0.0004136954667046666, "learning_rate": 4.006024096385543e-06, "loss": -0.0037, "num_tokens": 141790536.0, "reward": 0.9999241590499878, "reward_std": 0.12385457307100296, "rewards/accuracy_reward": 0.6986111044883728, "rewards/brier_reward": 0.764533269405365, "rewards/confidence_uniqueness_reward": 0.9451632022857666, "rewards/format_reward": 0.9940972208976746, "rewards/frontier_aurc_reward": -0.001692651305347681, "rewards/frontier_coverage_0": -0.03404254494234919, "rewards/frontier_coverage_1": -0.03404254494234919, "rewards/frontier_coverage_10": -0.03404254494234919, "rewards/frontier_coverage_15": -0.03404254494234919, "rewards/frontier_coverage_20": -0.03404254494234919, "rewards/frontier_coverage_25": -0.03404254494234919, "rewards/frontier_coverage_5": -0.03404254494234919, "rewards/frontier_ece_reward": 0.017354899458587168, "rewards/frontier_entropy_batch_reward": -0.1613529622554779, "signal/accuracy_reward/centered_abs_mean": 0.15860459804534913, "signal/accuracy_reward/group_bin_occupancy": 0.2, "signal/accuracy_reward/group_std_mean": 0.21018220484256744, "signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07930229902267456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07930229902267456, "signal/advantage_abs_mean": 0.09171124696731567, "signal/advantage_pre_scale_abs_mean": 0.09171124696731567, "signal/advantage_pre_scale_std": 0.1453747808933258, "signal/advantage_std": 0.1453747808933258, "signal/brier_reward/centered_abs_mean": 0.16959642767906188, "signal/brier_reward/group_bin_occupancy": 0.8746527777777778, "signal/brier_reward/group_std_mean": 0.21293676793575286, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016959642991423607, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016959642991423607, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023114091902971267, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8760416666666668, "signal/confidence_uniqueness_reward/group_std_mean": 0.03923774063587189, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023114092415198683, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023114092415198683, "signal/format_reward/centered_abs_mean": 0.010861545195803046, "signal/format_reward/group_bin_occupancy": 0.1392361111111111, "signal/format_reward/group_std_mean": 0.0244428563863039, "signal/format_reward/group_zero_std_frac": 0.8861111283302308, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005430772597901523, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005430772597901523, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014858563197776675, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7024305555555556, "signal/frontier_aurc_reward/group_std_mean": 0.0023568171076476575, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.857320366980275e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.857320366980275e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20621402859687804, "signal/frontier_coverage_0/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_0/group_std_mean": 0.2692394435405731, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_1/centered_abs_mean": 0.20621402859687804, "signal/frontier_coverage_1/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_1/group_std_mean": 0.2692394435405731, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_10/centered_abs_mean": 0.20621402859687804, "signal/frontier_coverage_10/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_10/group_std_mean": 0.2692394435405731, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_15/centered_abs_mean": 0.20621402859687804, "signal/frontier_coverage_15/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_15/group_std_mean": 0.2692394435405731, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_20/centered_abs_mean": 0.20621402859687804, "signal/frontier_coverage_20/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_20/group_std_mean": 0.2692394435405731, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_25/centered_abs_mean": 0.20621402859687804, "signal/frontier_coverage_25/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_25/group_std_mean": 0.2692394435405731, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_5/centered_abs_mean": 0.20621402859687804, "signal/frontier_coverage_5/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_5/group_std_mean": 0.2692394435405731, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00257767541334033, "signal/frontier_ece_reward/centered_abs_mean": 0.057116496562957766, "signal/frontier_ece_reward/group_bin_occupancy": 0.7486111111111111, "signal/frontier_ece_reward/group_std_mean": 0.07407085299491882, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00571164982393384, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00571164982393384, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23405362963676452, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7993055555555555, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3055886387825012, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023405364155769347, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023405364155769347, "step": 75 }, { "calibration/aurc": 0.2076444932942266, "calibration/batch_distribution_entropy": 0.9566517910549462, "calibration/batch_entropy_100bins": 0.9491858911049785, "calibration/batch_entropy_10bins": 0.9566517910549462, "calibration/batch_entropy_50bins": 0.9586329550903072, "calibration/batch_uniqueness": 0.9478874863011697, "calibration/buffer_distribution_entropy": 0.9042071878672668, "calibration/buffer_entropy_100bins": 0.8985727602789755, "calibration/buffer_entropy_10bins": 0.9042071878672668, "calibration/buffer_entropy_50bins": 0.9185937542698432, "calibration/confidence_entropy": 0.49120686633309446, "calibration/coverage@0%": 0.019409886968950837, "calibration/coverage@1%": 0.019409886968950837, "calibration/coverage@10%": 0.17595065074395838, "calibration/coverage@15%": 0.5099854214864996, "calibration/coverage@20%": 0.5846423011494986, "calibration/coverage@25%": 0.6973665942443268, "calibration/coverage@30%": 0.7510149662143296, "calibration/coverage@5%": 0.11041733099983801, "calibration/ece": 0.18258882314927863, "calibration/mean_confidence": 0.6132432048852545, "calibration/prompt_uniqueness": 0.8730205259179573, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 3484.6, "completions/max_terminated_length": 3484.6, "completions/mean_length": 672.1552978515625, "completions/mean_terminated_length": 679.1800170898438, "completions/min_length": 0.0, "completions/min_terminated_length": 170.2, "epoch": 0.19199760002999963, "grad_norm": 0.0004997280775569379, "learning_rate": 3.855421686746989e-06, "loss": -0.0094, "num_tokens": 152587045.0, "reward": 0.9794714093208313, "reward_std": 0.13951779305934905, "rewards/accuracy_reward": 0.6626736044883728, "rewards/brier_reward": 0.7578543424606323, "rewards/confidence_uniqueness_reward": 0.9402774095535278, "rewards/format_reward": 0.9893229126930236, "rewards/frontier_aurc_reward": -0.0018948199227452277, "rewards/frontier_coverage_0": -0.014700169442221522, "rewards/frontier_coverage_1": -0.014700169442221522, "rewards/frontier_coverage_10": -0.014700169442221522, "rewards/frontier_coverage_15": -0.014700169442221522, "rewards/frontier_coverage_20": -0.014700169442221522, "rewards/frontier_coverage_25": -0.014700169442221522, "rewards/frontier_coverage_5": -0.014700169442221522, "rewards/frontier_ece_reward": 0.019117896631360053, "rewards/frontier_entropy_batch_reward": -0.16941888332366944, "signal/accuracy_reward/centered_abs_mean": 0.18274739384651184, "signal/accuracy_reward/group_bin_occupancy": 0.20833333333333331, "signal/accuracy_reward/group_std_mean": 0.23848095238208772, "signal/accuracy_reward/group_zero_std_frac": 0.33333333134651183, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09137369692325592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09137369692325592, "signal/advantage_abs_mean": 0.10523971766233445, "signal/advantage_pre_scale_abs_mean": 0.10523971766233445, "signal/advantage_pre_scale_std": 0.16219059228897095, "signal/advantage_std": 0.16219059228897095, "signal/brier_reward/centered_abs_mean": 0.17205582857131957, "signal/brier_reward/group_bin_occupancy": 0.8708333333333332, "signal/brier_reward/group_std_mean": 0.21611034870147705, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017205582931637764, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017205582931637764, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029724714532494544, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.861111111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.046706152707338335, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002972471574321389, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002972471574321389, "signal/format_reward/centered_abs_mean": 0.01745334193110466, "signal/format_reward/group_bin_occupancy": 0.14131944444444444, "signal/format_reward/group_std_mean": 0.03190700151026249, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00872667096555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00872667096555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017476935172453523, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7006944444444445, "signal/frontier_aurc_reward/group_std_mean": 0.002774294326081872, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1846168237971142e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1846168237971142e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20781008899211884, "signal/frontier_coverage_0/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_0/group_std_mean": 0.27139957547187804, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_1/centered_abs_mean": 0.20781008899211884, "signal/frontier_coverage_1/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_1/group_std_mean": 0.27139957547187804, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_10/centered_abs_mean": 0.20781008899211884, "signal/frontier_coverage_10/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_10/group_std_mean": 0.27139957547187804, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_15/centered_abs_mean": 0.20781008899211884, "signal/frontier_coverage_15/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_15/group_std_mean": 0.27139957547187804, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_20/centered_abs_mean": 0.20781008899211884, "signal/frontier_coverage_20/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_20/group_std_mean": 0.27139957547187804, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_25/centered_abs_mean": 0.20781008899211884, "signal/frontier_coverage_25/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_25/group_std_mean": 0.27139957547187804, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_5/centered_abs_mean": 0.20781008899211884, "signal/frontier_coverage_5/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_5/group_std_mean": 0.27139957547187804, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025976261589676143, "signal/frontier_ece_reward/centered_abs_mean": 0.0579615406692028, "signal/frontier_ece_reward/group_bin_occupancy": 0.7409722222222223, "signal/frontier_ece_reward/group_std_mean": 0.07379811108112336, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005796154215931893, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005796154215931893, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24322098791599273, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7798611111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3134605050086975, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02432209961116314, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02432209961116314, "step": 80 }, { "calibration/aurc": 0.20271091384745868, "calibration/batch_distribution_entropy": 0.9737261800910246, "calibration/batch_entropy_100bins": 0.9591620782628887, "calibration/batch_entropy_10bins": 0.9737261800910246, "calibration/batch_entropy_50bins": 0.9716196064375529, "calibration/batch_uniqueness": 0.9514730500511689, "calibration/buffer_distribution_entropy": 0.9112810148129586, "calibration/buffer_entropy_100bins": 0.9083171951473498, "calibration/buffer_entropy_10bins": 0.9112810148129586, "calibration/buffer_entropy_50bins": 0.9257044262181973, "calibration/confidence_entropy": 0.5057748742528816, "calibration/coverage@0%": 0.01730324074074074, "calibration/coverage@1%": 0.01730324074074074, "calibration/coverage@10%": 0.2049778473780437, "calibration/coverage@15%": 0.3992730494612039, "calibration/coverage@20%": 0.5388349573395385, "calibration/coverage@25%": 0.7145227360868722, "calibration/coverage@30%": 0.8611193783068783, "calibration/coverage@5%": 0.0400214947089947, "calibration/ece": 0.1378626115809829, "calibration/mean_confidence": 0.5521758505614638, "calibration/prompt_uniqueness": 0.8702453560167897, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005555555555555536, "completions/max_length": 3238.0, "completions/max_terminated_length": 3238.0, "completions/mean_length": 672.5680541992188, "completions/mean_terminated_length": 676.3651611328125, "completions/min_length": 0.0, "completions/min_terminated_length": 220.4, "epoch": 0.2039974500318746, "grad_norm": 0.00044680043356493115, "learning_rate": 3.7048192771084342e-06, "loss": -0.0043, "num_tokens": 163422229.0, "reward": 0.9934585094451904, "reward_std": 0.1226073071360588, "rewards/accuracy_reward": 0.6869791746139526, "rewards/brier_reward": 0.7851580858230591, "rewards/confidence_uniqueness_reward": 0.9424677133560181, "rewards/format_reward": 0.9941840291023254, "rewards/frontier_aurc_reward": -0.0016598706366494297, "rewards/frontier_coverage_0": -0.004633589053992182, "rewards/frontier_coverage_1": -0.004633589053992182, "rewards/frontier_coverage_10": -0.004633589053992182, "rewards/frontier_coverage_15": -0.004633589053992182, "rewards/frontier_coverage_20": -0.004633589053992182, "rewards/frontier_coverage_25": -0.004633589053992182, "rewards/frontier_coverage_5": -0.004633589053992182, "rewards/frontier_ece_reward": 0.02196214161813259, "rewards/frontier_entropy_batch_reward": -0.21655711829662322, "signal/accuracy_reward/centered_abs_mean": 0.16119791865348815, "signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776, "signal/accuracy_reward/group_std_mean": 0.2105330467224121, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08059895932674407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08059895932674407, "signal/advantage_abs_mean": 0.0923902839422226, "signal/advantage_pre_scale_abs_mean": 0.0923902839422226, "signal/advantage_pre_scale_std": 0.14465830028057097, "signal/advantage_std": 0.14465830028057097, "signal/brier_reward/centered_abs_mean": 0.1523455262184143, "signal/brier_reward/group_bin_occupancy": 0.851388888888889, "signal/brier_reward/group_std_mean": 0.1943160504102707, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015234552882611751, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015234552882611751, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421053908765316, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8864583333333332, "signal/confidence_uniqueness_reward/group_std_mean": 0.038174081966280936, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024210539646446704, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024210539646446704, "signal/format_reward/centered_abs_mean": 0.01048719622194767, "signal/format_reward/group_bin_occupancy": 0.13645833333333332, "signal/format_reward/group_std_mean": 0.021353743970394135, "signal/format_reward/group_zero_std_frac": 0.9083333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005243598110973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005243598110973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015173830557614564, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6986111111111112, "signal/frontier_aurc_reward/group_std_mean": 0.0024198783095926045, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8967288997373544e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8967288997373544e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19244979321956635, "signal/frontier_coverage_0/group_bin_occupancy": 0.8506944444444444, "signal/frontier_coverage_0/group_std_mean": 0.2510490626096725, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_1/centered_abs_mean": 0.19244979321956635, "signal/frontier_coverage_1/group_bin_occupancy": 0.8506944444444444, "signal/frontier_coverage_1/group_std_mean": 0.2510490626096725, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_10/centered_abs_mean": 0.19244979321956635, "signal/frontier_coverage_10/group_bin_occupancy": 0.8506944444444444, "signal/frontier_coverage_10/group_std_mean": 0.2510490626096725, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_15/centered_abs_mean": 0.19244979321956635, "signal/frontier_coverage_15/group_bin_occupancy": 0.8506944444444444, "signal/frontier_coverage_15/group_std_mean": 0.2510490626096725, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_20/centered_abs_mean": 0.19244979321956635, "signal/frontier_coverage_20/group_bin_occupancy": 0.8506944444444444, "signal/frontier_coverage_20/group_std_mean": 0.2510490626096725, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_25/centered_abs_mean": 0.19244979321956635, "signal/frontier_coverage_25/group_bin_occupancy": 0.8506944444444444, "signal/frontier_coverage_25/group_std_mean": 0.2510490626096725, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_5/centered_abs_mean": 0.19244979321956635, "signal/frontier_coverage_5/group_bin_occupancy": 0.8506944444444444, "signal/frontier_coverage_5/group_std_mean": 0.2510490626096725, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024056224152445792, "signal/frontier_ece_reward/centered_abs_mean": 0.052489711344242095, "signal/frontier_ece_reward/group_bin_occupancy": 0.7017361111111111, "signal/frontier_ece_reward/group_std_mean": 0.06726017668843269, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005248971004039049, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005248971004039049, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2638679683208466, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3345774471759796, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026386797800660132, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026386797800660132, "step": 85 }, { "calibration/aurc": 0.1771991198328447, "calibration/batch_distribution_entropy": 0.96243817415222, "calibration/batch_entropy_100bins": 0.9507006712592847, "calibration/batch_entropy_10bins": 0.96243817415222, "calibration/batch_entropy_50bins": 0.964868282268785, "calibration/batch_uniqueness": 0.9487259176376346, "calibration/buffer_distribution_entropy": 0.9187161356306482, "calibration/buffer_entropy_100bins": 0.9165996492677836, "calibration/buffer_entropy_10bins": 0.9187161356306482, "calibration/buffer_entropy_50bins": 0.9323867334705161, "calibration/confidence_entropy": 0.5093906412982893, "calibration/coverage@0%": 0.048757301233618, "calibration/coverage@1%": 0.048757301233618, "calibration/coverage@10%": 0.306309071579122, "calibration/coverage@15%": 0.4915969045669811, "calibration/coverage@20%": 0.603687003183948, "calibration/coverage@25%": 0.780469418411813, "calibration/coverage@30%": 0.8668421125340184, "calibration/coverage@5%": 0.08637593441360888, "calibration/ece": 0.16357922386808874, "calibration/mean_confidence": 0.5690209591292246, "calibration/prompt_uniqueness": 0.8674022167680248, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0064236111111110935, "completions/max_length": 3078.6, "completions/max_terminated_length": 3078.6, "completions/mean_length": 640.9775268554688, "completions/mean_terminated_length": 645.139404296875, "completions/min_length": 0.0, "completions/min_terminated_length": 178.6, "epoch": 0.2159973000337496, "grad_norm": 0.0004924891400150955, "learning_rate": 3.5542168674698798e-06, "loss": -0.0054, "num_tokens": 173874962.0, "reward": 0.9923341274261475, "reward_std": 0.12305669635534286, "rewards/accuracy_reward": 0.6828993082046508, "rewards/brier_reward": 0.777597713470459, "rewards/confidence_uniqueness_reward": 0.9433488249778748, "rewards/format_reward": 0.9934895753860473, "rewards/frontier_aurc_reward": -0.0015883626649156213, "rewards/frontier_coverage_0": -0.00989127003122121, "rewards/frontier_coverage_1": -0.00989127003122121, "rewards/frontier_coverage_10": -0.00989127003122121, "rewards/frontier_coverage_15": -0.00989127003122121, "rewards/frontier_coverage_20": -0.00989127003122121, "rewards/frontier_coverage_25": -0.00989127003122121, "rewards/frontier_coverage_5": -0.00989127003122121, "rewards/frontier_ece_reward": 0.018967508152127267, "rewards/frontier_entropy_batch_reward": -0.18966446816921234, "signal/accuracy_reward/centered_abs_mean": 0.16197374165058137, "signal/accuracy_reward/group_bin_occupancy": 0.19791666666666669, "signal/accuracy_reward/group_std_mean": 0.20919720828533173, "signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08098687082529069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08098687082529069, "signal/advantage_abs_mean": 0.09262900650501252, "signal/advantage_pre_scale_abs_mean": 0.09262900650501252, "signal/advantage_pre_scale_std": 0.14502845108509063, "signal/advantage_std": 0.14502845108509063, "signal/brier_reward/centered_abs_mean": 0.15718668401241304, "signal/brier_reward/group_bin_occupancy": 0.8552083333333333, "signal/brier_reward/group_std_mean": 0.1992782771587372, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01571866814047098, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01571866814047098, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024519116804003715, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.88125, "signal/confidence_uniqueness_reward/group_std_mean": 0.03934002220630646, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024519118014723063, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024519118014723063, "signal/format_reward/centered_abs_mean": 0.011572265625, "signal/format_reward/group_bin_occupancy": 0.1378472222222222, "signal/format_reward/group_std_mean": 0.023632752522826195, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0057861328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0057861328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014947153860703111, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445, "signal/frontier_aurc_reward/group_std_mean": 0.0023850529454648496, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.868394247139804e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.868394247139804e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1994914710521698, "signal/frontier_coverage_0/group_bin_occupancy": 0.8565972222222221, "signal/frontier_coverage_0/group_std_mean": 0.25926323533058165, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_1/centered_abs_mean": 0.1994914710521698, "signal/frontier_coverage_1/group_bin_occupancy": 0.8565972222222221, "signal/frontier_coverage_1/group_std_mean": 0.25926323533058165, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_10/centered_abs_mean": 0.1994914710521698, "signal/frontier_coverage_10/group_bin_occupancy": 0.8565972222222221, "signal/frontier_coverage_10/group_std_mean": 0.25926323533058165, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_15/centered_abs_mean": 0.1994914710521698, "signal/frontier_coverage_15/group_bin_occupancy": 0.8565972222222221, "signal/frontier_coverage_15/group_std_mean": 0.25926323533058165, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_20/centered_abs_mean": 0.1994914710521698, "signal/frontier_coverage_20/group_bin_occupancy": 0.8565972222222221, "signal/frontier_coverage_20/group_std_mean": 0.25926323533058165, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_25/centered_abs_mean": 0.1994914710521698, "signal/frontier_coverage_25/group_bin_occupancy": 0.8565972222222221, "signal/frontier_coverage_25/group_std_mean": 0.25926323533058165, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_5/centered_abs_mean": 0.1994914710521698, "signal/frontier_coverage_5/group_bin_occupancy": 0.8565972222222221, "signal/frontier_coverage_5/group_std_mean": 0.25926323533058165, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024936434347182512, "signal/frontier_ece_reward/centered_abs_mean": 0.050507232546806335, "signal/frontier_ece_reward/group_bin_occupancy": 0.6986111111111111, "signal/frontier_ece_reward/group_std_mean": 0.06456724032759667, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005050723347812891, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005050723347812891, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25887452661991117, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7711805555555555, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3302801251411438, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02588745318353176, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02588745318353176, "step": 90 }, { "calibration/aurc": 0.21387456839353614, "calibration/batch_distribution_entropy": 0.9823557824251997, "calibration/batch_entropy_100bins": 0.9648278783784832, "calibration/batch_entropy_10bins": 0.9823557824251997, "calibration/batch_entropy_50bins": 0.9772968018292392, "calibration/batch_uniqueness": 0.9534788717971289, "calibration/buffer_distribution_entropy": 0.9254646438274845, "calibration/buffer_entropy_100bins": 0.9237693855572863, "calibration/buffer_entropy_10bins": 0.9254646438274845, "calibration/buffer_entropy_50bins": 0.9382846474644742, "calibration/confidence_entropy": 0.4893090729142555, "calibration/coverage@0%": 0.06979967507118581, "calibration/coverage@1%": 0.10558914875539635, "calibration/coverage@10%": 0.27119950973638285, "calibration/coverage@15%": 0.5634953097731239, "calibration/coverage@20%": 0.6151696116928447, "calibration/coverage@25%": 0.654349367364747, "calibration/coverage@30%": 0.7034767670157068, "calibration/coverage@5%": 0.18697819073206579, "calibration/ece": 0.20191279563686187, "calibration/mean_confidence": 0.5199859337619741, "calibration/prompt_uniqueness": 0.8663527153909893, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004253472222222232, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 630.5767456054688, "completions/mean_terminated_length": 633.2605712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 162.4, "epoch": 0.22799715003562457, "grad_norm": 0.00040928201633505523, "learning_rate": 3.4036144578313257e-06, "loss": -0.0023, "num_tokens": 184230886.0, "reward": 0.9884204149246216, "reward_std": 0.1153764232993126, "rewards/accuracy_reward": 0.6644097328186035, "rewards/brier_reward": 0.7661008715629578, "rewards/confidence_uniqueness_reward": 0.9478963613510132, "rewards/format_reward": 0.9957465291023254, "rewards/frontier_aurc_reward": -0.0016027359291911126, "rewards/frontier_coverage_0": -0.002308785542845726, "rewards/frontier_coverage_1": -0.002308785542845726, "rewards/frontier_coverage_10": -0.002308785542845726, "rewards/frontier_coverage_15": -0.002308785542845726, "rewards/frontier_coverage_20": -0.002308785542845726, "rewards/frontier_coverage_25": -0.002308785542845726, "rewards/frontier_coverage_5": -0.002308785542845726, "rewards/frontier_ece_reward": 0.019070269353687764, "rewards/frontier_entropy_batch_reward": -0.14742431938648223, "signal/accuracy_reward/centered_abs_mean": 0.14867621660232544, "signal/accuracy_reward/group_bin_occupancy": 0.196875, "signal/accuracy_reward/group_std_mean": 0.199493145942688, "signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07433810830116272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07433810830116272, "signal/advantage_abs_mean": 0.08577116578817368, "signal/advantage_pre_scale_abs_mean": 0.08577116578817368, "signal/advantage_pre_scale_std": 0.13571085333824157, "signal/advantage_std": 0.13571085333824157, "signal/brier_reward/centered_abs_mean": 0.16521736681461335, "signal/brier_reward/group_bin_occupancy": 0.8541666666666667, "signal/brier_reward/group_std_mean": 0.20886048674583435, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016521737165749072, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016521737165749072, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01989307664334774, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.884375, "signal/confidence_uniqueness_reward/group_std_mean": 0.03198789656162262, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001989307696931064, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001989307696931064, "signal/format_reward/centered_abs_mean": 0.007590060774236918, "signal/format_reward/group_bin_occupancy": 0.13506944444444444, "signal/format_reward/group_std_mean": 0.01698396187275648, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003795030387118459, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003795030387118459, "signal/frontier_aurc_reward/centered_abs_mean": 0.001562464004382491, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6795138888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0025261019822210074, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.953080100065563e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.953080100065563e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2118411511182785, "signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_0/group_std_mean": 0.2758490860462189, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_1/centered_abs_mean": 0.2118411511182785, "signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_1/group_std_mean": 0.2758490860462189, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_10/centered_abs_mean": 0.2118411511182785, "signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_10/group_std_mean": 0.2758490860462189, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_15/centered_abs_mean": 0.2118411511182785, "signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_15/group_std_mean": 0.2758490860462189, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_20/centered_abs_mean": 0.2118411511182785, "signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_20/group_std_mean": 0.2758490860462189, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_25/centered_abs_mean": 0.2118411511182785, "signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_25/group_std_mean": 0.2758490860462189, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_5/centered_abs_mean": 0.2118411511182785, "signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_5/group_std_mean": 0.2758490860462189, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026480144821107387, "signal/frontier_ece_reward/centered_abs_mean": 0.05060374662280083, "signal/frontier_ece_reward/group_bin_occupancy": 0.7100694444444444, "signal/frontier_ece_reward/group_std_mean": 0.06428168565034867, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050603746436536316, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050603746436536316, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22136266827583312, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763888888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.29468963146209715, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022136268392205238, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022136268392205238, "step": 95 }, { "calibration/aurc": 0.167972204724354, "calibration/batch_distribution_entropy": 0.9864342846502329, "calibration/batch_entropy_100bins": 0.9650832272289168, "calibration/batch_entropy_10bins": 0.9864342846502329, "calibration/batch_entropy_50bins": 0.9783947242346824, "calibration/batch_uniqueness": 0.953848941281708, "calibration/buffer_distribution_entropy": 0.9315273864669636, "calibration/buffer_entropy_100bins": 0.9301545524945812, "calibration/buffer_entropy_10bins": 0.9315273864669636, "calibration/buffer_entropy_50bins": 0.9434719750608054, "calibration/confidence_entropy": 0.5021417689027668, "calibration/coverage@0%": 0.030512946582513044, "calibration/coverage@1%": 0.030512946582513044, "calibration/coverage@10%": 0.34233461774347035, "calibration/coverage@15%": 0.47939107668565634, "calibration/coverage@20%": 0.7026149703671164, "calibration/coverage@25%": 0.8276201440877331, "calibration/coverage@30%": 0.8976031607885394, "calibration/coverage@5%": 0.11523160748304304, "calibration/ece": 0.17367782749954952, "calibration/mean_confidence": 0.5388179797320996, "calibration/prompt_uniqueness": 0.8750848836612375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008333333333333326, "completions/max_length": 2917.4, "completions/max_terminated_length": 2917.4, "completions/mean_length": 626.88291015625, "completions/mean_terminated_length": 632.1784790039062, "completions/min_length": 0.0, "completions/min_terminated_length": 188.8, "epoch": 0.23999700003749952, "grad_norm": 0.0003905615594703704, "learning_rate": 3.2530120481927713e-06, "loss": -0.0055, "num_tokens": 194551649.0, "reward": 0.9936325430870057, "reward_std": 0.12192367911338806, "rewards/accuracy_reward": 0.6775173544883728, "rewards/brier_reward": 0.7730206847190857, "rewards/confidence_uniqueness_reward": 0.9442116141319274, "rewards/format_reward": 0.9916666746139526, "rewards/frontier_aurc_reward": -0.0014509693486616016, "rewards/frontier_coverage_0": -8.213166147470474e-05, "rewards/frontier_coverage_1": -8.213166147470474e-05, "rewards/frontier_coverage_10": -8.213166147470474e-05, "rewards/frontier_coverage_15": -8.213166147470474e-05, "rewards/frontier_coverage_20": -8.213166147470474e-05, "rewards/frontier_coverage_25": -8.213166147470474e-05, "rewards/frontier_coverage_5": -8.213166147470474e-05, "rewards/frontier_ece_reward": 0.01913320329040289, "rewards/frontier_entropy_batch_reward": -0.14570734947919844, "signal/accuracy_reward/centered_abs_mean": 0.16055229902267457, "signal/accuracy_reward/group_bin_occupancy": 0.19930555555555557, "signal/accuracy_reward/group_std_mean": 0.21070023775100707, "signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08027614951133728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08027614951133728, "signal/advantage_abs_mean": 0.090542633831501, "signal/advantage_pre_scale_abs_mean": 0.090542633831501, "signal/advantage_pre_scale_std": 0.14465495347976684, "signal/advantage_std": 0.14465495347976684, "signal/brier_reward/centered_abs_mean": 0.16810146272182463, "signal/brier_reward/group_bin_occupancy": 0.8604166666666666, "signal/brier_reward/group_std_mean": 0.21120634078979492, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016810146719217302, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016810146719217302, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024970437213778496, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8760416666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.040504425019025805, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024970436468720438, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024970436468720438, "signal/format_reward/centered_abs_mean": 0.01360677070915699, "signal/format_reward/group_bin_occupancy": 0.13923611111111112, "signal/format_reward/group_std_mean": 0.026751523464918138, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006803385354578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006803385354578495, "signal/frontier_aurc_reward/centered_abs_mean": 0.001497122971341014, "signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0024236575700342655, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.871403837867547e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.871403837867547e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22072286307811737, "signal/frontier_coverage_0/group_bin_occupancy": 0.8427083333333332, "signal/frontier_coverage_0/group_std_mean": 0.28572168946266174, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_1/centered_abs_mean": 0.22072286307811737, "signal/frontier_coverage_1/group_bin_occupancy": 0.8427083333333332, "signal/frontier_coverage_1/group_std_mean": 0.28572168946266174, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_10/centered_abs_mean": 0.22072286307811737, "signal/frontier_coverage_10/group_bin_occupancy": 0.8427083333333332, "signal/frontier_coverage_10/group_std_mean": 0.28572168946266174, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_15/centered_abs_mean": 0.22072286307811737, "signal/frontier_coverage_15/group_bin_occupancy": 0.8427083333333332, "signal/frontier_coverage_15/group_std_mean": 0.28572168946266174, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_20/centered_abs_mean": 0.22072286307811737, "signal/frontier_coverage_20/group_bin_occupancy": 0.8427083333333332, "signal/frontier_coverage_20/group_std_mean": 0.28572168946266174, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_25/centered_abs_mean": 0.22072286307811737, "signal/frontier_coverage_25/group_bin_occupancy": 0.8427083333333332, "signal/frontier_coverage_25/group_std_mean": 0.28572168946266174, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_5/centered_abs_mean": 0.22072286307811737, "signal/frontier_coverage_5/group_bin_occupancy": 0.8427083333333332, "signal/frontier_coverage_5/group_std_mean": 0.28572168946266174, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027590358164161443, "signal/frontier_ece_reward/centered_abs_mean": 0.04990529865026474, "signal/frontier_ece_reward/group_bin_occupancy": 0.7184027777777777, "signal/frontier_ece_reward/group_std_mean": 0.06357247680425644, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004990530014038086, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004990530014038086, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22153306305408477, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7583333333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2920175909996033, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022153307124972342, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022153307124972342, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.1696630845347579, "eval_calibration/batch_distribution_entropy": 0.9338246735522308, "eval_calibration/batch_entropy_100bins": 0.706892459061427, "eval_calibration/batch_entropy_10bins": 0.9338246735522308, "eval_calibration/batch_entropy_50bins": 0.7818401869433624, "eval_calibration/batch_uniqueness": 0.8962181321540061, "eval_calibration/buffer_distribution_entropy": 0.9351777755636745, "eval_calibration/buffer_entropy_100bins": 0.9338170230335757, "eval_calibration/buffer_entropy_10bins": 0.9351777755636745, "eval_calibration/buffer_entropy_50bins": 0.9465654691196751, "eval_calibration/confidence_entropy": 0.47687795562464963, "eval_calibration/coverage@0%": 0.21908602150537634, "eval_calibration/coverage@1%": 0.21908602150537634, "eval_calibration/coverage@10%": 0.401377688172043, "eval_calibration/coverage@15%": 0.4847110215053763, "eval_calibration/coverage@20%": 0.6584341397849462, "eval_calibration/coverage@25%": 0.8929771505376344, "eval_calibration/coverage@30%": 0.946236559139785, "eval_calibration/coverage@5%": 0.276377688172043, "eval_calibration/ece": 0.23803624165826145, "eval_calibration/mean_confidence": 0.569122975052372, "eval_calibration/prompt_uniqueness": 0.8962181321540061, "eval_completions/clipped_ratio": 0.010416666666666666, "eval_completions/max_length": 2299.3333333333335, "eval_completions/max_terminated_length": 2299.3333333333335, "eval_completions/mean_length": 617.4854431152344, "eval_completions/mean_terminated_length": 624.0654602050781, "eval_completions/min_length": 45.333333333333336, "eval_completions/min_terminated_length": 213.66666666666666, "eval_loss": 0.0, "eval_num_tokens": 194551649.0, "eval_reward": 0.9322001536687216, "eval_reward_std": 0.23809615274270376, "eval_rewards/accuracy_reward": 0.6710069477558136, "eval_rewards/brier_reward": 0.7738438149293264, "eval_rewards/confidence_uniqueness_reward": 0.8830358386039734, "eval_rewards/format_reward": 0.9869791766007742, "eval_rewards/frontier_aurc_reward": -0.0014650731851967673, "eval_rewards/frontier_coverage_0": 0.00044721147666374844, "eval_rewards/frontier_coverage_1": 0.00044721147666374844, "eval_rewards/frontier_coverage_10": 0.00044721147666374844, "eval_rewards/frontier_coverage_15": 0.00044721147666374844, "eval_rewards/frontier_coverage_20": 0.00044721147666374844, "eval_rewards/frontier_coverage_25": 0.00044721147666374844, "eval_rewards/frontier_coverage_5": 0.00044721147666374844, "eval_rewards/frontier_ece_reward": 0.01968886749818921, "eval_rewards/frontier_entropy_batch_reward": -0.6447059710820516, "eval_runtime": 205.8691, "eval_samples_per_second": 4.857, "eval_signal/accuracy_reward/centered_abs_mean": 0.4316948801279068, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4710538685321808, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2158474400639534, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2158474400639534, "eval_signal/advantage_abs_mean": 0.20248722285032272, "eval_signal/advantage_pre_scale_abs_mean": 0.20248722285032272, "eval_signal/advantage_pre_scale_std": 0.23723148057858148, "eval_signal/advantage_std": 0.23723148057858148, "eval_signal/brier_reward/centered_abs_mean": 0.20928792655467987, "eval_signal/brier_reward/group_bin_occupancy": 0.8854166666666666, "eval_signal/brier_reward/group_std_mean": 0.2662544945875804, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020928792965908844, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020928792965908844, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.054847310607632004, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09344139198462169, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005484731014197071, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005484731014197071, "eval_signal/format_reward/centered_abs_mean": 0.025010850590964157, "eval_signal/format_reward/group_bin_occupancy": 0.17013888888888887, "eval_signal/format_reward/group_std_mean": 0.06767813861370087, "eval_signal/format_reward/group_zero_std_frac": 0.6388889104127884, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012505425295482079, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012505425295482079, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020411182777024806, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6631944444444445, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0037244935131942234, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.551397907761081e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.551397907761081e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2758402054508527, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_0/group_std_mean": 0.39653781056404114, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2758402054508527, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_1/group_std_mean": 0.39653781056404114, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.2758402054508527, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_10/group_std_mean": 0.39653781056404114, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2758402054508527, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_15/group_std_mean": 0.39653781056404114, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2758402054508527, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_20/group_std_mean": 0.39653781056404114, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2758402054508527, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.39653781056404114, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2758402054508527, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_5/group_std_mean": 0.39653781056404114, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00344800246724238, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05408057694633802, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9097222222222223, "eval_signal/frontier_ece_reward/group_std_mean": 0.0697250347584486, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005408057787766059, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005408057787766059, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3219749679168065, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2951388888888889, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33611299594243366, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032197498405973114, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032197498405973114, "eval_steps_per_second": 0.029, "step": 100 }, { "calibration/aurc": 0.28984463916978553, "calibration/batch_distribution_entropy": 0.9711874364190758, "calibration/batch_entropy_100bins": 0.9559796276599972, "calibration/batch_entropy_10bins": 0.9711874364190758, "calibration/batch_entropy_50bins": 0.967740156453336, "calibration/batch_uniqueness": 0.9505796993365717, "calibration/buffer_distribution_entropy": 0.936615480617325, "calibration/buffer_entropy_100bins": 0.9357953238378386, "calibration/buffer_entropy_10bins": 0.936615480617325, "calibration/buffer_entropy_50bins": 0.9480214843525481, "calibration/confidence_entropy": 0.5001631137185123, "calibration/coverage@0%": 0.01958675450744108, "calibration/coverage@1%": 0.01958675450744108, "calibration/coverage@10%": 0.16792198551812815, "calibration/coverage@15%": 0.18639011882766576, "calibration/coverage@20%": 0.2856551133219956, "calibration/coverage@25%": 0.3307244292133193, "calibration/coverage@30%": 0.48409656797179723, "calibration/coverage@5%": 0.1408980940299875, "calibration/ece": 0.14950855344866051, "calibration/mean_confidence": 0.5798324340804183, "calibration/prompt_uniqueness": 0.8621616643105219, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3329.6, "completions/max_terminated_length": 3329.6, "completions/mean_length": 630.7128662109375, "completions/mean_terminated_length": 637.3713989257812, "completions/min_length": 0.0, "completions/min_terminated_length": 172.6, "epoch": 0.2519968500393745, "grad_norm": 0.0003931356477551162, "learning_rate": 3.1024096385542172e-06, "loss": -0.0083, "num_tokens": 204894325.0, "reward": 0.9878118515014649, "reward_std": 0.12439936995506287, "rewards/accuracy_reward": 0.6724826335906983, "rewards/brier_reward": 0.7758374571800232, "rewards/confidence_uniqueness_reward": 0.9409982562065125, "rewards/format_reward": 0.9895833373069763, "rewards/frontier_aurc_reward": -0.0015477648237720131, "rewards/frontier_coverage_0": -0.00018841465935111047, "rewards/frontier_coverage_1": -0.00018841465935111047, "rewards/frontier_coverage_10": -0.00018841465935111047, "rewards/frontier_coverage_15": -0.00018841465935111047, "rewards/frontier_coverage_20": -0.00018841465935111047, "rewards/frontier_coverage_25": -0.00018841465935111047, "rewards/frontier_coverage_5": -0.00018841465935111047, "rewards/frontier_ece_reward": 0.017811648175120355, "rewards/frontier_entropy_batch_reward": -0.16650085747241974, "signal/accuracy_reward/centered_abs_mean": 0.1506781682372093, "signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776, "signal/accuracy_reward/group_std_mean": 0.20425570011138916, "signal/accuracy_reward/group_zero_std_frac": 0.4027777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07533908411860465, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07533908411860465, "signal/advantage_abs_mean": 0.09059339612722397, "signal/advantage_pre_scale_abs_mean": 0.09059339612722397, "signal/advantage_pre_scale_std": 0.14705823063850404, "signal/advantage_std": 0.14705823063850404, "signal/brier_reward/centered_abs_mean": 0.1585765987634659, "signal/brier_reward/group_bin_occupancy": 0.85, "signal/brier_reward/group_std_mean": 0.20116137266159057, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015857660584151743, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015857660584151743, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028387091308832168, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8545138888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.046965491771697995, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002838709158822894, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002838709158822894, "signal/format_reward/centered_abs_mean": 0.016764323227107525, "signal/format_reward/group_bin_occupancy": 0.14270833333333335, "signal/format_reward/group_std_mean": 0.03298989050090313, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008382161613553762, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008382161613553762, "signal/frontier_aurc_reward/centered_abs_mean": 0.001613885280676186, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6847222222222223, "signal/frontier_aurc_reward/group_std_mean": 0.0026253133080899717, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.017356746364385e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.017356746364385e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19782112836837767, "signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_0/group_std_mean": 0.2575752854347229, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_1/centered_abs_mean": 0.19782112836837767, "signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_1/group_std_mean": 0.2575752854347229, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_10/centered_abs_mean": 0.19782112836837767, "signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_10/group_std_mean": 0.2575752854347229, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_15/centered_abs_mean": 0.19782112836837767, "signal/frontier_coverage_15/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_15/group_std_mean": 0.2575752854347229, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_20/centered_abs_mean": 0.19782112836837767, "signal/frontier_coverage_20/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_20/group_std_mean": 0.2575752854347229, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_25/centered_abs_mean": 0.19782112836837767, "signal/frontier_coverage_25/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_25/group_std_mean": 0.2575752854347229, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_5/centered_abs_mean": 0.19782112836837767, "signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_5/group_std_mean": 0.2575752854347229, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024727642070502044, "signal/frontier_ece_reward/centered_abs_mean": 0.04588953480124473, "signal/frontier_ece_reward/group_bin_occupancy": 0.7059027777777779, "signal/frontier_ece_reward/group_std_mean": 0.058351149410009386, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004588953498750925, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004588953498750925, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2355831891298294, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7725694444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3069721281528473, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023558317869901656, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023558317869901656, "step": 105 }, { "calibration/aurc": 0.16567868451536366, "calibration/batch_distribution_entropy": 0.9527322950855959, "calibration/batch_entropy_100bins": 0.947479658399056, "calibration/batch_entropy_10bins": 0.9527322950855959, "calibration/batch_entropy_50bins": 0.9567347662661371, "calibration/batch_uniqueness": 0.9478300016023, "calibration/buffer_distribution_entropy": 0.9392886191298354, "calibration/buffer_entropy_100bins": 0.9397206217856423, "calibration/buffer_entropy_10bins": 0.9392886191298354, "calibration/buffer_entropy_50bins": 0.9508547762112409, "calibration/confidence_entropy": 0.5005231604018499, "calibration/coverage@0%": 0.03288614870450616, "calibration/coverage@1%": 0.03288614870450616, "calibration/coverage@10%": 0.3226409737961885, "calibration/coverage@15%": 0.477594869979861, "calibration/coverage@20%": 0.6694523645087733, "calibration/coverage@25%": 0.7880677437475745, "calibration/coverage@30%": 0.9082701187335092, "calibration/coverage@5%": 0.14515768917448005, "calibration/ece": 0.13138400877437387, "calibration/mean_confidence": 0.6028444186351783, "calibration/prompt_uniqueness": 0.8709859308023828, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010069444444444419, "completions/max_length": 3546.2, "completions/max_terminated_length": 3546.2, "completions/mean_length": 640.7478271484375, "completions/mean_terminated_length": 647.2959106445312, "completions/min_length": 0.0, "completions/min_terminated_length": 192.6, "epoch": 0.2639967000412495, "grad_norm": 0.0004377875302452594, "learning_rate": 2.9518072289156627e-06, "loss": -0.009, "num_tokens": 215384188.0, "reward": 1.001729953289032, "reward_std": 0.1265808016061783, "rewards/accuracy_reward": 0.7092881917953491, "rewards/brier_reward": 0.7818510174751282, "rewards/confidence_uniqueness_reward": 0.9397328972816468, "rewards/format_reward": 0.9899305582046509, "rewards/frontier_aurc_reward": -0.001325283572077751, "rewards/frontier_coverage_0": -0.02074230033904314, "rewards/frontier_coverage_1": -0.02074230033904314, "rewards/frontier_coverage_10": -0.02074230033904314, "rewards/frontier_coverage_15": -0.02074230033904314, "rewards/frontier_coverage_20": -0.02074230033904314, "rewards/frontier_coverage_25": -0.02074230033904314, "rewards/frontier_coverage_5": -0.02074230033904314, "rewards/frontier_ece_reward": 0.01406394112855196, "rewards/frontier_entropy_batch_reward": -0.19612716436386107, "signal/accuracy_reward/centered_abs_mean": 0.15444336235523223, "signal/accuracy_reward/group_bin_occupancy": 0.20381944444444441, "signal/accuracy_reward/group_std_mean": 0.21105689704418182, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07722168117761612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07722168117761612, "signal/advantage_abs_mean": 0.0925620898604393, "signal/advantage_pre_scale_abs_mean": 0.0925620898604393, "signal/advantage_pre_scale_std": 0.15025562345981597, "signal/advantage_std": 0.15025562345981597, "signal/brier_reward/centered_abs_mean": 0.15158471167087556, "signal/brier_reward/group_bin_occupancy": 0.8395833333333332, "signal/brier_reward/group_std_mean": 0.19477559626102448, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015158471278846264, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015158471278846264, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02864648588001728, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8680555555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.045315783470869064, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002864648727700114, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002864648727700114, "signal/format_reward/centered_abs_mean": 0.01662326380610466, "signal/format_reward/group_bin_occupancy": 0.14097222222222222, "signal/format_reward/group_std_mean": 0.03081248588860035, "signal/format_reward/group_zero_std_frac": 0.8722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00831163190305233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00831163190305233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013894882751628757, "signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889, "signal/frontier_aurc_reward/group_std_mean": 0.00229809598531574, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7368603403156156e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7368603403156156e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1891954332590103, "signal/frontier_coverage_0/group_bin_occupancy": 0.8295138888888889, "signal/frontier_coverage_0/group_std_mean": 0.24967995285987854, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_1/centered_abs_mean": 0.1891954332590103, "signal/frontier_coverage_1/group_bin_occupancy": 0.8295138888888889, "signal/frontier_coverage_1/group_std_mean": 0.24967995285987854, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_10/centered_abs_mean": 0.1891954332590103, "signal/frontier_coverage_10/group_bin_occupancy": 0.8295138888888889, "signal/frontier_coverage_10/group_std_mean": 0.24967995285987854, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_15/centered_abs_mean": 0.1891954332590103, "signal/frontier_coverage_15/group_bin_occupancy": 0.8295138888888889, "signal/frontier_coverage_15/group_std_mean": 0.24967995285987854, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_20/centered_abs_mean": 0.1891954332590103, "signal/frontier_coverage_20/group_bin_occupancy": 0.8295138888888889, "signal/frontier_coverage_20/group_std_mean": 0.24967995285987854, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_25/centered_abs_mean": 0.1891954332590103, "signal/frontier_coverage_25/group_bin_occupancy": 0.8295138888888889, "signal/frontier_coverage_25/group_std_mean": 0.24967995285987854, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_5/centered_abs_mean": 0.1891954332590103, "signal/frontier_coverage_5/group_bin_occupancy": 0.8295138888888889, "signal/frontier_coverage_5/group_std_mean": 0.24967995285987854, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002364942990243435, "signal/frontier_ece_reward/centered_abs_mean": 0.041539561748504636, "signal/frontier_ece_reward/group_bin_occupancy": 0.7034722222222223, "signal/frontier_ece_reward/group_std_mean": 0.05418416783213616, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041539563797414305, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041539563797414305, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2623390406370163, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7722222222222223, "signal/frontier_entropy_batch_reward/group_std_mean": 0.335136216878891, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026233907043933868, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026233907043933868, "step": 110 }, { "calibration/aurc": 0.29473056838071987, "calibration/batch_distribution_entropy": 0.9619916322012759, "calibration/batch_entropy_100bins": 0.9549874064799202, "calibration/batch_entropy_10bins": 0.9619916322012759, "calibration/batch_entropy_50bins": 0.9647061223514687, "calibration/batch_uniqueness": 0.9495682126495403, "calibration/buffer_distribution_entropy": 0.9414297817272864, "calibration/buffer_entropy_100bins": 0.9430816398527859, "calibration/buffer_entropy_10bins": 0.9414297817272864, "calibration/buffer_entropy_50bins": 0.9532562123434823, "calibration/confidence_entropy": 0.497030786564154, "calibration/coverage@0%": 0.013913364638495167, "calibration/coverage@1%": 0.013913364638495167, "calibration/coverage@10%": 0.040717969175169524, "calibration/coverage@15%": 0.11740594901471416, "calibration/coverage@20%": 0.3688631313189874, "calibration/coverage@25%": 0.47341993382008873, "calibration/coverage@30%": 0.5718340073342899, "calibration/coverage@5%": 0.013913364638495167, "calibration/ece": 0.18056783838522902, "calibration/mean_confidence": 0.5710495335219846, "calibration/prompt_uniqueness": 0.8647219156071875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014930555555555535, "completions/max_length": 3417.4, "completions/max_terminated_length": 3417.4, "completions/mean_length": 639.9868041992188, "completions/mean_terminated_length": 649.7999755859375, "completions/min_length": 0.0, "completions/min_terminated_length": 185.6, "epoch": 0.27599655004312446, "grad_norm": 0.00040515753789804876, "learning_rate": 2.8012048192771087e-06, "loss": -0.0125, "num_tokens": 225836036.0, "reward": 0.9746179699897766, "reward_std": 0.13332333266735077, "rewards/accuracy_reward": 0.6622395753860474, "rewards/brier_reward": 0.7698838829994201, "rewards/confidence_uniqueness_reward": 0.9336004853248596, "rewards/format_reward": 0.9848090410232544, "rewards/frontier_aurc_reward": -0.0017310404684394598, "rewards/frontier_coverage_0": 0.004687186796218157, "rewards/frontier_coverage_1": 0.004687186796218157, "rewards/frontier_coverage_10": 0.004687186796218157, "rewards/frontier_coverage_15": 0.004687186796218157, "rewards/frontier_coverage_20": 0.004687186796218157, "rewards/frontier_coverage_25": 0.004687186796218157, "rewards/frontier_coverage_5": 0.004687186796218157, "rewards/frontier_ece_reward": 0.016754307225346564, "rewards/frontier_entropy_batch_reward": -0.213187313079834, "signal/accuracy_reward/centered_abs_mean": 0.15972764790058136, "signal/accuracy_reward/group_bin_occupancy": 0.196875, "signal/accuracy_reward/group_std_mean": 0.206351837515831, "signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07986382395029068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07986382395029068, "signal/advantage_abs_mean": 0.09935903698205947, "signal/advantage_pre_scale_abs_mean": 0.09935903698205947, "signal/advantage_pre_scale_std": 0.1625169038772583, "signal/advantage_std": 0.1625169038772583, "signal/brier_reward/centered_abs_mean": 0.16147418916225434, "signal/brier_reward/group_bin_occupancy": 0.8649305555555555, "signal/brier_reward/group_std_mean": 0.20425305664539337, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016147419437766077, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016147419437766077, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036166596412658694, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8284722222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.05944165885448456, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003616659576073289, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003616659576073289, "signal/format_reward/centered_abs_mean": 0.02408311627805233, "signal/format_reward/group_bin_occupancy": 0.1482638888888889, "signal/format_reward/group_std_mean": 0.04518638737499714, "signal/format_reward/group_zero_std_frac": 0.8138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012041558139026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012041558139026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017531340941786765, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6899305555555555, "signal/frontier_aurc_reward/group_std_mean": 0.002778572216629982, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1914176249993035e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1914176249993035e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19187724888324736, "signal/frontier_coverage_0/group_bin_occupancy": 0.8548611111111111, "signal/frontier_coverage_0/group_std_mean": 0.25314462184906006, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_1/centered_abs_mean": 0.19187724888324736, "signal/frontier_coverage_1/group_bin_occupancy": 0.8548611111111111, "signal/frontier_coverage_1/group_std_mean": 0.25314462184906006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_10/centered_abs_mean": 0.19187724888324736, "signal/frontier_coverage_10/group_bin_occupancy": 0.8548611111111111, "signal/frontier_coverage_10/group_std_mean": 0.25314462184906006, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_15/centered_abs_mean": 0.19187724888324736, "signal/frontier_coverage_15/group_bin_occupancy": 0.8548611111111111, "signal/frontier_coverage_15/group_std_mean": 0.25314462184906006, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_20/centered_abs_mean": 0.19187724888324736, "signal/frontier_coverage_20/group_bin_occupancy": 0.8548611111111111, "signal/frontier_coverage_20/group_std_mean": 0.25314462184906006, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_25/centered_abs_mean": 0.19187724888324736, "signal/frontier_coverage_25/group_bin_occupancy": 0.8548611111111111, "signal/frontier_coverage_25/group_std_mean": 0.25314462184906006, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_5/centered_abs_mean": 0.19187724888324736, "signal/frontier_coverage_5/group_bin_occupancy": 0.8548611111111111, "signal/frontier_coverage_5/group_std_mean": 0.25314462184906006, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023984656669199466, "signal/frontier_ece_reward/centered_abs_mean": 0.04284475669264794, "signal/frontier_ece_reward/group_bin_occupancy": 0.6947916666666667, "signal/frontier_ece_reward/group_std_mean": 0.054882925003767014, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004284475743770599, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004284475743770599, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2705032885074615, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7972222222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33811612129211427, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027050328627228736, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027050328627228736, "step": 115 }, { "calibration/aurc": 0.2692657849205195, "calibration/batch_distribution_entropy": 0.9659327417988296, "calibration/batch_entropy_100bins": 0.9541108697407532, "calibration/batch_entropy_10bins": 0.9659327417988296, "calibration/batch_entropy_50bins": 0.9666704637716524, "calibration/batch_uniqueness": 0.9494544866872708, "calibration/buffer_distribution_entropy": 0.9450548596655558, "calibration/buffer_entropy_100bins": 0.946823957265584, "calibration/buffer_entropy_10bins": 0.9450548596655558, "calibration/buffer_entropy_50bins": 0.9562487948463927, "calibration/confidence_entropy": 0.4875023680573502, "calibration/coverage@0%": 0.017792988425402438, "calibration/coverage@1%": 0.017792988425402438, "calibration/coverage@10%": 0.2124956955037931, "calibration/coverage@15%": 0.4193426391829937, "calibration/coverage@20%": 0.5009450510714555, "calibration/coverage@25%": 0.5343342250707565, "calibration/coverage@30%": 0.566149934383202, "calibration/coverage@5%": 0.05855147063862448, "calibration/ece": 0.17746895129703658, "calibration/mean_confidence": 0.5736739808286649, "calibration/prompt_uniqueness": 0.865164092609454, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012326388888888906, "completions/max_length": 3293.2, "completions/max_terminated_length": 3293.2, "completions/mean_length": 630.0300415039062, "completions/mean_terminated_length": 637.8402709960938, "completions/min_length": 0.0, "completions/min_terminated_length": 203.6, "epoch": 0.28799640004499943, "grad_norm": 0.0003517817531246692, "learning_rate": 2.6506024096385547e-06, "loss": -0.0098, "num_tokens": 236175838.0, "reward": 0.9898527383804321, "reward_std": 0.1258085072040558, "rewards/accuracy_reward": 0.6803819417953492, "rewards/brier_reward": 0.7743050813674927, "rewards/confidence_uniqueness_reward": 0.9388420104980468, "rewards/format_reward": 0.9875868082046508, "rewards/frontier_aurc_reward": -0.00158976421225816, "rewards/frontier_coverage_0": -0.0026821551844477655, "rewards/frontier_coverage_1": -0.0026821551844477655, "rewards/frontier_coverage_10": -0.0026821551844477655, "rewards/frontier_coverage_15": -0.0026821551844477655, "rewards/frontier_coverage_20": -0.0026821551844477655, "rewards/frontier_coverage_25": -0.0026821551844477655, "rewards/frontier_coverage_5": -0.0026821551844477655, "rewards/frontier_ece_reward": 0.015108131617307664, "rewards/frontier_entropy_batch_reward": -0.16702641248703004, "signal/accuracy_reward/centered_abs_mean": 0.15770399272441865, "signal/accuracy_reward/group_bin_occupancy": 0.196875, "signal/accuracy_reward/group_std_mean": 0.2057257741689682, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07885199636220933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07885199636220933, "signal/advantage_abs_mean": 0.09386955499649048, "signal/advantage_pre_scale_abs_mean": 0.09386955499649048, "signal/advantage_pre_scale_std": 0.1530741721391678, "signal/advantage_std": 0.1530741721391678, "signal/brier_reward/centered_abs_mean": 0.15571836829185487, "signal/brier_reward/group_bin_occupancy": 0.8451388888888889, "signal/brier_reward/group_std_mean": 0.19793447852134705, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015571837686002255, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015571837686002255, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03023452088236809, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85, "signal/confidence_uniqueness_reward/group_std_mean": 0.04891353026032448, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030234521254897118, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030234521254897118, "signal/format_reward/centered_abs_mean": 0.018983289785683154, "signal/format_reward/group_bin_occupancy": 0.14340277777777777, "signal/format_reward/group_std_mean": 0.03533447273075581, "signal/format_reward/group_zero_std_frac": 0.8527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009491644892841577, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009491644892841577, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016145243542268872, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7020833333333334, "signal/frontier_aurc_reward/group_std_mean": 0.0025344877038151028, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0181555009912698e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0181555009912698e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19334494173526764, "signal/frontier_coverage_0/group_bin_occupancy": 0.842013888888889, "signal/frontier_coverage_0/group_std_mean": 0.253934046626091, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_1/centered_abs_mean": 0.19334494173526764, "signal/frontier_coverage_1/group_bin_occupancy": 0.842013888888889, "signal/frontier_coverage_1/group_std_mean": 0.253934046626091, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_10/centered_abs_mean": 0.19334494173526764, "signal/frontier_coverage_10/group_bin_occupancy": 0.842013888888889, "signal/frontier_coverage_10/group_std_mean": 0.253934046626091, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_15/centered_abs_mean": 0.19334494173526764, "signal/frontier_coverage_15/group_bin_occupancy": 0.842013888888889, "signal/frontier_coverage_15/group_std_mean": 0.253934046626091, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_20/centered_abs_mean": 0.19334494173526764, "signal/frontier_coverage_20/group_bin_occupancy": 0.842013888888889, "signal/frontier_coverage_20/group_std_mean": 0.253934046626091, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_25/centered_abs_mean": 0.19334494173526764, "signal/frontier_coverage_25/group_bin_occupancy": 0.842013888888889, "signal/frontier_coverage_25/group_std_mean": 0.253934046626091, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_5/centered_abs_mean": 0.19334494173526764, "signal/frontier_coverage_5/group_bin_occupancy": 0.842013888888889, "signal/frontier_coverage_5/group_std_mean": 0.253934046626091, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002416811836883426, "signal/frontier_ece_reward/centered_abs_mean": 0.041843322664499284, "signal/frontier_ece_reward/group_bin_occupancy": 0.6902777777777777, "signal/frontier_ece_reward/group_std_mean": 0.05341664999723435, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004184332210570574, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004184332210570574, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22915047109127046, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7739583333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.29721260666847227, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02291504740715027, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02291504740715027, "step": 120 }, { "calibration/aurc": 0.19007498936195838, "calibration/batch_distribution_entropy": 0.9497641139330535, "calibration/batch_entropy_100bins": 0.9439166973372888, "calibration/batch_entropy_10bins": 0.9497641139330535, "calibration/batch_entropy_50bins": 0.9538831122680052, "calibration/batch_uniqueness": 0.9468067326996552, "calibration/buffer_distribution_entropy": 0.94693078276862, "calibration/buffer_entropy_100bins": 0.9496853046137248, "calibration/buffer_entropy_10bins": 0.94693078276862, "calibration/buffer_entropy_50bins": 0.9582811996332786, "calibration/confidence_entropy": 0.4982449646222564, "calibration/coverage@0%": 0.0068008289703315895, "calibration/coverage@1%": 0.0068008289703315895, "calibration/coverage@10%": 0.3090450479930192, "calibration/coverage@15%": 0.3981652644399026, "calibration/coverage@20%": 0.5950416503253889, "calibration/coverage@25%": 0.7859338837229998, "calibration/coverage@30%": 0.8696369763562011, "calibration/coverage@5%": 0.16477966841186736, "calibration/ece": 0.13885856134762334, "calibration/mean_confidence": 0.6056185988614311, "calibration/prompt_uniqueness": 0.860661265331734, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00980902777777779, "completions/max_length": 3500.2, "completions/max_terminated_length": 3500.2, "completions/mean_length": 624.0658935546875, "completions/mean_terminated_length": 630.3100341796875, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.2999962500468744, "grad_norm": 0.00044128746958449483, "learning_rate": 2.5e-06, "loss": -0.0094, "num_tokens": 246482741.0, "reward": 0.9921006441116333, "reward_std": 0.12561330199241638, "rewards/accuracy_reward": 0.6873263835906982, "rewards/brier_reward": 0.7992384910583497, "rewards/confidence_uniqueness_reward": 0.9376503348350524, "rewards/format_reward": 0.9897569417953491, "rewards/frontier_aurc_reward": -0.001279058470390737, "rewards/frontier_coverage_0": 0.006591923534870148, "rewards/frontier_coverage_1": 0.006591923534870148, "rewards/frontier_coverage_10": 0.006591923534870148, "rewards/frontier_coverage_15": 0.006591923534870148, "rewards/frontier_coverage_20": 0.006591923534870148, "rewards/frontier_coverage_25": 0.006591923534870148, "rewards/frontier_coverage_5": 0.006591923534870148, "rewards/frontier_ece_reward": 0.01775702629238367, "rewards/frontier_entropy_batch_reward": -0.22466442584991456, "signal/accuracy_reward/centered_abs_mean": 0.15937500298023224, "signal/accuracy_reward/group_bin_occupancy": 0.19687499999999997, "signal/accuracy_reward/group_std_mean": 0.20706891417503356, "signal/accuracy_reward/group_zero_std_frac": 0.42499999403953553, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07968750149011612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07968750149011612, "signal/advantage_abs_mean": 0.09454565495252609, "signal/advantage_pre_scale_abs_mean": 0.09454565495252609, "signal/advantage_pre_scale_std": 0.15264492034912108, "signal/advantage_std": 0.15264492034912108, "signal/brier_reward/centered_abs_mean": 0.1370186984539032, "signal/brier_reward/group_bin_occupancy": 0.8465277777777779, "signal/brier_reward/group_std_mean": 0.17555441856384277, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013701869174838066, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013701869174838066, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030255821347236634, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8583333333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.047763481736183167, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030255821999162435, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030255821999162435, "signal/format_reward/centered_abs_mean": 0.01732855923473835, "signal/format_reward/group_bin_occupancy": 0.14166666666666666, "signal/format_reward/group_std_mean": 0.03218508400022983, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008664279617369175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008664279617369175, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014055859064683318, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6861111111111111, "signal/frontier_aurc_reward/group_std_mean": 0.002288359007798135, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.756982401275309e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.756982401275309e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1769299864768982, "signal/frontier_coverage_0/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_0/group_std_mean": 0.23164838552474976, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_1/centered_abs_mean": 0.1769299864768982, "signal/frontier_coverage_1/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_1/group_std_mean": 0.23164838552474976, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_10/centered_abs_mean": 0.1769299864768982, "signal/frontier_coverage_10/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_10/group_std_mean": 0.23164838552474976, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_15/centered_abs_mean": 0.1769299864768982, "signal/frontier_coverage_15/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_15/group_std_mean": 0.23164838552474976, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_20/centered_abs_mean": 0.1769299864768982, "signal/frontier_coverage_20/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_20/group_std_mean": 0.23164838552474976, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_25/centered_abs_mean": 0.1769299864768982, "signal/frontier_coverage_25/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_25/group_std_mean": 0.23164838552474976, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_5/centered_abs_mean": 0.1769299864768982, "signal/frontier_coverage_5/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_5/group_std_mean": 0.23164838552474976, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002211624849587679, "signal/frontier_ece_reward/centered_abs_mean": 0.038429119437932965, "signal/frontier_ece_reward/group_bin_occupancy": 0.6680555555555555, "signal/frontier_ece_reward/group_std_mean": 0.04894906431436539, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038429120555520057, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038429120555520057, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2667219638824463, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.79375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3330686569213867, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02667219564318657, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02667219564318657, "step": 125 }, { "calibration/aurc": 0.2570677552976662, "calibration/batch_distribution_entropy": 0.9707298208948314, "calibration/batch_entropy_100bins": 0.9563656907393726, "calibration/batch_entropy_10bins": 0.9707298208948314, "calibration/batch_entropy_50bins": 0.968625931346003, "calibration/batch_uniqueness": 0.9509968355199117, "calibration/buffer_distribution_entropy": 0.9483898179653849, "calibration/buffer_entropy_100bins": 0.9520036670790271, "calibration/buffer_entropy_10bins": 0.9483898179653849, "calibration/buffer_entropy_50bins": 0.9599194290242906, "calibration/confidence_entropy": 0.5047739615640212, "calibration/coverage@0%": 0.020457317272494398, "calibration/coverage@1%": 0.020457317272494398, "calibration/coverage@10%": 0.08590234345050488, "calibration/coverage@15%": 0.2708854907636981, "calibration/coverage@20%": 0.43295266504226493, "calibration/coverage@25%": 0.5562714025652029, "calibration/coverage@30%": 0.6542660724253092, "calibration/coverage@5%": 0.027263599995007482, "calibration/ece": 0.15055879786288034, "calibration/mean_confidence": 0.5460301453849368, "calibration/prompt_uniqueness": 0.8671654591324399, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01692708333333335, "completions/max_length": 3702.0, "completions/max_terminated_length": 3702.0, "completions/mean_length": 650.5223022460938, "completions/mean_terminated_length": 661.8441528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 173.4, "epoch": 0.3119961000487494, "grad_norm": 0.0004091080045327544, "learning_rate": 2.349397590361446e-06, "loss": -0.0137, "num_tokens": 257101558.0, "reward": 0.9774605631828308, "reward_std": 0.134475240111351, "rewards/accuracy_reward": 0.6628472208976746, "rewards/brier_reward": 0.7783628225326538, "rewards/confidence_uniqueness_reward": 0.9335785031318664, "rewards/format_reward": 0.9829861044883728, "rewards/frontier_aurc_reward": -0.0014200884848833083, "rewards/frontier_coverage_0": 0.006945094745606184, "rewards/frontier_coverage_1": 0.006945094745606184, "rewards/frontier_coverage_10": 0.006945094745606184, "rewards/frontier_coverage_15": 0.006945094745606184, "rewards/frontier_coverage_20": 0.006945094745606184, "rewards/frontier_coverage_25": 0.006945094745606184, "rewards/frontier_coverage_5": 0.006945094745606184, "rewards/frontier_ece_reward": 0.013097218424081802, "rewards/frontier_entropy_batch_reward": -0.18549902439117433, "signal/accuracy_reward/centered_abs_mean": 0.16636284589767455, "signal/accuracy_reward/group_bin_occupancy": 0.20243055555555559, "signal/accuracy_reward/group_std_mean": 0.21947809755802156, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08318142294883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08318142294883728, "signal/advantage_abs_mean": 0.10097838938236237, "signal/advantage_pre_scale_abs_mean": 0.10097838938236237, "signal/advantage_pre_scale_std": 0.16073089241981506, "signal/advantage_std": 0.16073089241981506, "signal/brier_reward/centered_abs_mean": 0.1482792615890503, "signal/brier_reward/group_bin_occupancy": 0.8479166666666668, "signal/brier_reward/group_std_mean": 0.1903451293706894, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014827927015721798, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014827927015721798, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03590902425348759, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8461805555555555, "signal/confidence_uniqueness_reward/group_std_mean": 0.05420064702630043, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035909025464206934, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035909025464206934, "signal/format_reward/centered_abs_mean": 0.02457682266831398, "signal/format_reward/group_bin_occupancy": 0.14409722222222224, "signal/format_reward/group_std_mean": 0.04063734821975231, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01228841133415699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01228841133415699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014170024311169981, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7017361111111111, "signal/frontier_aurc_reward/group_std_mean": 0.002285012090578675, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7712531553115694e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7712531553115694e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19146940410137175, "signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333334, "signal/frontier_coverage_0/group_std_mean": 0.25203768312931063, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_1/centered_abs_mean": 0.19146940410137175, "signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333334, "signal/frontier_coverage_1/group_std_mean": 0.25203768312931063, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_10/centered_abs_mean": 0.19146940410137175, "signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333334, "signal/frontier_coverage_10/group_std_mean": 0.25203768312931063, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_15/centered_abs_mean": 0.19146940410137175, "signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333334, "signal/frontier_coverage_15/group_std_mean": 0.25203768312931063, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_20/centered_abs_mean": 0.19146940410137175, "signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333334, "signal/frontier_coverage_20/group_std_mean": 0.25203768312931063, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_25/centered_abs_mean": 0.19146940410137175, "signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333334, "signal/frontier_coverage_25/group_std_mean": 0.25203768312931063, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_5/centered_abs_mean": 0.19146940410137175, "signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333334, "signal/frontier_coverage_5/group_std_mean": 0.25203768312931063, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023933676537126304, "signal/frontier_ece_reward/centered_abs_mean": 0.03642968088388443, "signal/frontier_ece_reward/group_bin_occupancy": 0.6868055555555557, "signal/frontier_ece_reward/group_std_mean": 0.04717910811305046, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036429683677852154, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036429683677852154, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24993859529495238, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7871527777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.31895039677619935, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024993859976530076, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024993859976530076, "step": 130 }, { "calibration/aurc": 0.23095194989951517, "calibration/batch_distribution_entropy": 0.9421492482556129, "calibration/batch_entropy_100bins": 0.9440415676294579, "calibration/batch_entropy_10bins": 0.9421492482556129, "calibration/batch_entropy_50bins": 0.9520449592729043, "calibration/batch_uniqueness": 0.9453759538189441, "calibration/buffer_distribution_entropy": 0.950428047537104, "calibration/buffer_entropy_100bins": 0.9543930678343209, "calibration/buffer_entropy_10bins": 0.950428047537104, "calibration/buffer_entropy_50bins": 0.9617481873791439, "calibration/confidence_entropy": 0.47065056430076924, "calibration/coverage@0%": 0.04189883140091326, "calibration/coverage@1%": 0.04189883140091326, "calibration/coverage@10%": 0.3034366940325464, "calibration/coverage@15%": 0.3935276632602748, "calibration/coverage@20%": 0.48246784032256607, "calibration/coverage@25%": 0.5515483076447136, "calibration/coverage@30%": 0.6815676706121045, "calibration/coverage@5%": 0.17324618582756293, "calibration/ece": 0.14784717956359078, "calibration/mean_confidence": 0.6212378872019609, "calibration/prompt_uniqueness": 0.8551414614959321, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555536, "completions/max_length": 3302.8, "completions/max_terminated_length": 3302.8, "completions/mean_length": 626.293505859375, "completions/mean_terminated_length": 633.8015014648438, "completions/min_length": 0.0, "completions/min_terminated_length": 159.4, "epoch": 0.32399595005062437, "grad_norm": 0.0004363918851595372, "learning_rate": 2.1987951807228917e-06, "loss": -0.0106, "num_tokens": 267409483.0, "reward": 0.9906501173973083, "reward_std": 0.12640073150396347, "rewards/accuracy_reward": 0.6786458253860473, "rewards/brier_reward": 0.7910825252532959, "rewards/confidence_uniqueness_reward": 0.9383931279182434, "rewards/format_reward": 0.9881076216697693, "rewards/frontier_aurc_reward": -0.0013120988383889197, "rewards/frontier_coverage_0": 0.014272965677082538, "rewards/frontier_coverage_1": 0.014272965677082538, "rewards/frontier_coverage_10": 0.014272965677082538, "rewards/frontier_coverage_15": 0.014272965677082538, "rewards/frontier_coverage_20": 0.014272965677082538, "rewards/frontier_coverage_25": 0.014272965677082538, "rewards/frontier_coverage_5": 0.014272965677082538, "rewards/frontier_ece_reward": 0.016592884063720705, "rewards/frontier_entropy_batch_reward": -0.18565942943096161, "signal/accuracy_reward/centered_abs_mean": 0.15636935830116272, "signal/accuracy_reward/group_bin_occupancy": 0.19722222222222224, "signal/accuracy_reward/group_std_mean": 0.20537342131137848, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07818467915058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07818467915058136, "signal/advantage_abs_mean": 0.0925526574254036, "signal/advantage_pre_scale_abs_mean": 0.0925526574254036, "signal/advantage_pre_scale_std": 0.15250625014305114, "signal/advantage_std": 0.15250625014305114, "signal/brier_reward/centered_abs_mean": 0.14853745102882385, "signal/brier_reward/group_bin_occupancy": 0.8354166666666668, "signal/brier_reward/group_std_mean": 0.1917984515428543, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014853744953870773, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014853744953870773, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03174131475389004, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8378472222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.05302174612879753, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031741314101964234, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031741314101964234, "signal/format_reward/centered_abs_mean": 0.02053493894636631, "signal/format_reward/group_bin_occupancy": 0.14583333333333334, "signal/format_reward/group_std_mean": 0.039642113447189334, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010267469473183155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010267469473183155, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014899963280186057, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6937500000000001, "signal/frontier_aurc_reward/group_std_mean": 0.002411051280796528, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.862495373643469e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.862495373643469e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19647094905376433, "signal/frontier_coverage_0/group_bin_occupancy": 0.8375, "signal/frontier_coverage_0/group_std_mean": 0.25708119869232177, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_1/centered_abs_mean": 0.19647094905376433, "signal/frontier_coverage_1/group_bin_occupancy": 0.8375, "signal/frontier_coverage_1/group_std_mean": 0.25708119869232177, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_10/centered_abs_mean": 0.19647094905376433, "signal/frontier_coverage_10/group_bin_occupancy": 0.8375, "signal/frontier_coverage_10/group_std_mean": 0.25708119869232177, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_15/centered_abs_mean": 0.19647094905376433, "signal/frontier_coverage_15/group_bin_occupancy": 0.8375, "signal/frontier_coverage_15/group_std_mean": 0.25708119869232177, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_20/centered_abs_mean": 0.19647094905376433, "signal/frontier_coverage_20/group_bin_occupancy": 0.8375, "signal/frontier_coverage_20/group_std_mean": 0.25708119869232177, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_25/centered_abs_mean": 0.19647094905376433, "signal/frontier_coverage_25/group_bin_occupancy": 0.8375, "signal/frontier_coverage_25/group_std_mean": 0.25708119869232177, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_5/centered_abs_mean": 0.19647094905376433, "signal/frontier_coverage_5/group_bin_occupancy": 0.8375, "signal/frontier_coverage_5/group_std_mean": 0.25708119869232177, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024558869190514088, "signal/frontier_ece_reward/centered_abs_mean": 0.03865344226360321, "signal/frontier_ece_reward/group_bin_occupancy": 0.6791666666666667, "signal/frontier_ece_reward/group_std_mean": 0.04832939356565476, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038653444964438675, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038653444964438675, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23874907791614533, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773611111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3072973072528839, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023874907195568083, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023874907195568083, "step": 135 }, { "calibration/aurc": 0.1294049869245013, "calibration/batch_distribution_entropy": 0.9557180391089094, "calibration/batch_entropy_100bins": 0.9497484635160689, "calibration/batch_entropy_10bins": 0.9557180391089094, "calibration/batch_entropy_50bins": 0.9591875124129359, "calibration/batch_uniqueness": 0.9468833336742613, "calibration/buffer_distribution_entropy": 0.9549444448123883, "calibration/buffer_entropy_100bins": 0.9602670469433393, "calibration/buffer_entropy_10bins": 0.9549444448123883, "calibration/buffer_entropy_50bins": 0.9660572803453091, "calibration/confidence_entropy": 0.4833540273370396, "calibration/coverage@0%": 0.05841797685887148, "calibration/coverage@1%": 0.05841797685887148, "calibration/coverage@10%": 0.48837748487367827, "calibration/coverage@15%": 0.6872088995406774, "calibration/coverage@20%": 0.8049094782025777, "calibration/coverage@25%": 0.9049543537246117, "calibration/coverage@30%": 0.9723005208262135, "calibration/coverage@5%": 0.16348598057755775, "calibration/ece": 0.12621167596611338, "calibration/mean_confidence": 0.5960379852244188, "calibration/prompt_uniqueness": 0.8497180192039384, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3468.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 624.9230834960938, "completions/mean_terminated_length": 632.8444091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 187.2, "epoch": 0.33599580005249935, "grad_norm": 0.00038522930117323995, "learning_rate": 2.0481927710843377e-06, "loss": -0.0106, "num_tokens": 277712821.0, "reward": 0.9888324856758117, "reward_std": 0.12152263075113297, "rewards/accuracy_reward": 0.6763020873069763, "rewards/brier_reward": 0.7869715809822082, "rewards/confidence_uniqueness_reward": 0.9377409458160401, "rewards/format_reward": 0.9873264074325562, "rewards/frontier_aurc_reward": -0.0012948142597451807, "rewards/frontier_coverage_0": 0.013374109752476215, "rewards/frontier_coverage_1": 0.013374109752476215, "rewards/frontier_coverage_10": 0.013374109752476215, "rewards/frontier_coverage_15": 0.013374109752476215, "rewards/frontier_coverage_20": 0.013374109752476215, "rewards/frontier_coverage_25": 0.013374109752476215, "rewards/frontier_coverage_5": 0.013374109752476215, "rewards/frontier_ece_reward": 0.013652277737855911, "rewards/frontier_entropy_batch_reward": -0.17972289621829987, "signal/accuracy_reward/centered_abs_mean": 0.1446994349360466, "signal/accuracy_reward/group_bin_occupancy": 0.1954861111111111, "signal/accuracy_reward/group_std_mean": 0.19354265332221984, "signal/accuracy_reward/group_zero_std_frac": 0.43611112236976624, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0723497174680233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0723497174680233, "signal/advantage_abs_mean": 0.08829654604196549, "signal/advantage_pre_scale_abs_mean": 0.08829654604196549, "signal/advantage_pre_scale_std": 0.1477721154689789, "signal/advantage_std": 0.1477721154689789, "signal/brier_reward/centered_abs_mean": 0.1490771532058716, "signal/brier_reward/group_bin_occupancy": 0.8548611111111111, "signal/brier_reward/group_std_mean": 0.1905330777168274, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014907715283334256, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014907715283334256, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031153790652751923, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.829861111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.05239210352301597, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031153791584074496, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031153791584074496, "signal/format_reward/centered_abs_mean": 0.019932725466787815, "signal/format_reward/group_bin_occupancy": 0.14583333333333331, "signal/format_reward/group_std_mean": 0.03907729685306549, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009966362733393908, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009966362733393908, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014448148664087056, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6923611111111112, "signal/frontier_aurc_reward/group_std_mean": 0.002320256642997265, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8060186630464158e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8060186630464158e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19429327845573424, "signal/frontier_coverage_0/group_bin_occupancy": 0.8440972222222222, "signal/frontier_coverage_0/group_std_mean": 0.2543476581573486, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_1/centered_abs_mean": 0.19429327845573424, "signal/frontier_coverage_1/group_bin_occupancy": 0.8440972222222222, "signal/frontier_coverage_1/group_std_mean": 0.2543476581573486, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_10/centered_abs_mean": 0.19429327845573424, "signal/frontier_coverage_10/group_bin_occupancy": 0.8440972222222222, "signal/frontier_coverage_10/group_std_mean": 0.2543476581573486, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_15/centered_abs_mean": 0.19429327845573424, "signal/frontier_coverage_15/group_bin_occupancy": 0.8440972222222222, "signal/frontier_coverage_15/group_std_mean": 0.2543476581573486, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_20/centered_abs_mean": 0.19429327845573424, "signal/frontier_coverage_20/group_bin_occupancy": 0.8440972222222222, "signal/frontier_coverage_20/group_std_mean": 0.2543476581573486, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_25/centered_abs_mean": 0.19429327845573424, "signal/frontier_coverage_25/group_bin_occupancy": 0.8440972222222222, "signal/frontier_coverage_25/group_std_mean": 0.2543476581573486, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_5/centered_abs_mean": 0.19429327845573424, "signal/frontier_coverage_5/group_bin_occupancy": 0.8440972222222222, "signal/frontier_coverage_5/group_std_mean": 0.2543476581573486, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00242866612970829, "signal/frontier_ece_reward/centered_abs_mean": 0.035150817781686786, "signal/frontier_ece_reward/group_bin_occupancy": 0.6836805555555556, "signal/frontier_ece_reward/group_std_mean": 0.04459300860762596, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035150818061083556, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035150818061083556, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23712966442108155, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.779513888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3046766459941864, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023712967336177827, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023712967336177827, "step": 140 }, { "calibration/aurc": 0.18683794065072812, "calibration/batch_distribution_entropy": 0.9806387486177448, "calibration/batch_entropy_100bins": 0.9596139831547141, "calibration/batch_entropy_10bins": 0.9806387486177448, "calibration/batch_entropy_50bins": 0.9722668151489826, "calibration/batch_uniqueness": 0.9519202784227648, "calibration/buffer_distribution_entropy": 0.965098347204146, "calibration/buffer_entropy_100bins": 0.9711153442674696, "calibration/buffer_entropy_10bins": 0.965098347204146, "calibration/buffer_entropy_50bins": 0.9745830308057396, "calibration/confidence_entropy": 0.49438751115496604, "calibration/coverage@0%": 0.023666604008716417, "calibration/coverage@1%": 0.023666604008716417, "calibration/coverage@10%": 0.34799730209956625, "calibration/coverage@15%": 0.48327203315425693, "calibration/coverage@20%": 0.6064102691678379, "calibration/coverage@25%": 0.7282333990057179, "calibration/coverage@30%": 0.8306165917741957, "calibration/coverage@5%": 0.05673747015044871, "calibration/ece": 0.14025309224842797, "calibration/mean_confidence": 0.5135840354133834, "calibration/prompt_uniqueness": 0.8570269969432882, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0109375, "completions/max_length": 3256.0, "completions/max_terminated_length": 3256.0, "completions/mean_length": 611.480029296875, "completions/mean_terminated_length": 618.2994384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 192.4, "epoch": 0.34799565005437433, "grad_norm": 0.0003621011273935437, "learning_rate": 1.8975903614457832e-06, "loss": -0.0102, "num_tokens": 287821679.0, "reward": 1.004742157459259, "reward_std": 0.11290163099765778, "rewards/accuracy_reward": 0.7052951335906983, "rewards/brier_reward": 0.7879093527793884, "rewards/confidence_uniqueness_reward": 0.9406887650489807, "rewards/format_reward": 0.9889756917953492, "rewards/frontier_aurc_reward": -0.0011246049660257995, "rewards/frontier_coverage_0": -0.0045726167038083075, "rewards/frontier_coverage_1": -0.0045726167038083075, "rewards/frontier_coverage_10": -0.0045726167038083075, "rewards/frontier_coverage_15": -0.0045726167038083075, "rewards/frontier_coverage_20": -0.0045726167038083075, "rewards/frontier_coverage_25": -0.005139388330280781, "rewards/frontier_coverage_5": -0.0045726167038083075, "rewards/frontier_ece_reward": 0.008927960135042667, "rewards/frontier_entropy_batch_reward": -0.1572466716170311, "signal/accuracy_reward/centered_abs_mean": 0.1355523034930229, "signal/accuracy_reward/group_bin_occupancy": 0.19305555555555556, "signal/accuracy_reward/group_std_mean": 0.1837514191865921, "signal/accuracy_reward/group_zero_std_frac": 0.45555557012557985, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06777615174651146, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06777615174651146, "signal/advantage_abs_mean": 0.08125371336936951, "signal/advantage_pre_scale_abs_mean": 0.08125371336936951, "signal/advantage_pre_scale_std": 0.140377277135849, "signal/advantage_std": 0.140377277135849, "signal/brier_reward/centered_abs_mean": 0.14059088230133057, "signal/brier_reward/group_bin_occupancy": 0.8302083333333334, "signal/brier_reward/group_std_mean": 0.18201070427894592, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014059088379144668, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014059088379144668, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029994430020451546, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8548611111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.04794644489884377, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029994430486112835, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029994430486112835, "signal/format_reward/centered_abs_mean": 0.01914605051279068, "signal/format_reward/group_bin_occupancy": 0.14201388888888888, "signal/format_reward/group_std_mean": 0.03469080775976181, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00957302525639534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00957302525639534, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013646916137076913, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6895833333333333, "signal/frontier_aurc_reward/group_std_mean": 0.0023204814875498413, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7058645971701482e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7058645971701482e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19198527336120605, "signal/frontier_coverage_0/group_bin_occupancy": 0.821875, "signal/frontier_coverage_0/group_std_mean": 0.2514296382665634, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_1/centered_abs_mean": 0.19198527336120605, "signal/frontier_coverage_1/group_bin_occupancy": 0.821875, "signal/frontier_coverage_1/group_std_mean": 0.2514296382665634, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_10/centered_abs_mean": 0.19198527336120605, "signal/frontier_coverage_10/group_bin_occupancy": 0.821875, "signal/frontier_coverage_10/group_std_mean": 0.2514296382665634, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_15/centered_abs_mean": 0.19198527336120605, "signal/frontier_coverage_15/group_bin_occupancy": 0.821875, "signal/frontier_coverage_15/group_std_mean": 0.2514296382665634, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_20/centered_abs_mean": 0.19198527336120605, "signal/frontier_coverage_20/group_bin_occupancy": 0.821875, "signal/frontier_coverage_20/group_std_mean": 0.2514296382665634, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_25/centered_abs_mean": 0.19039924442768097, "signal/frontier_coverage_25/group_bin_occupancy": 0.8211805555555557, "signal/frontier_coverage_25/group_std_mean": 0.24940116107463836, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002379990741610527, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002379990741610527, "signal/frontier_coverage_5/centered_abs_mean": 0.19198527336120605, "signal/frontier_coverage_5/group_bin_occupancy": 0.821875, "signal/frontier_coverage_5/group_std_mean": 0.2514296382665634, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002399816084653139, "signal/frontier_ece_reward/centered_abs_mean": 0.030152727663517, "signal/frontier_ece_reward/group_bin_occupancy": 0.6826388888888889, "signal/frontier_ece_reward/group_std_mean": 0.03811613321304321, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030152729246765374, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030152729246765374, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2264914721250534, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7663194444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2937849909067154, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022649147361516953, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022649147361516953, "step": 145 }, { "calibration/aurc": 0.168028061806691, "calibration/batch_distribution_entropy": 0.9503921341637899, "calibration/batch_entropy_100bins": 0.9454745464314603, "calibration/batch_entropy_10bins": 0.9503921341637899, "calibration/batch_entropy_50bins": 0.9544974811591391, "calibration/batch_uniqueness": 0.9470963432074356, "calibration/buffer_distribution_entropy": 0.973697032680677, "calibration/buffer_entropy_100bins": 0.9799372478415842, "calibration/buffer_entropy_10bins": 0.973697032680677, "calibration/buffer_entropy_50bins": 0.9815119596059615, "calibration/confidence_entropy": 0.46900084214722726, "calibration/coverage@0%": 0.0677531953645879, "calibration/coverage@1%": 0.08203890965030221, "calibration/coverage@10%": 0.40898672177949313, "calibration/coverage@15%": 0.5754278277850677, "calibration/coverage@20%": 0.6407174776379441, "calibration/coverage@25%": 0.6956564603220498, "calibration/coverage@30%": 0.7747426797957127, "calibration/coverage@5%": 0.3361056663766333, "calibration/ece": 0.1727506317152436, "calibration/mean_confidence": 0.5696691972139503, "calibration/prompt_uniqueness": 0.8616723242905033, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009548611111111115, "completions/max_length": 3496.8, "completions/max_terminated_length": 3496.8, "completions/mean_length": 664.9171875, "completions/mean_terminated_length": 671.283251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 185.4, "epoch": 0.3599955000562493, "grad_norm": 0.00037436833372339606, "learning_rate": 1.7469879518072292e-06, "loss": -0.0077, "num_tokens": 298591861.0, "reward": 0.9980527639389039, "reward_std": 0.12233888059854507, "rewards/accuracy_reward": 0.6992187619209289, "rewards/brier_reward": 0.802030611038208, "rewards/confidence_uniqueness_reward": 0.9381472826004028, "rewards/format_reward": 0.9902777791023254, "rewards/frontier_aurc_reward": -0.001526768645271659, "rewards/frontier_coverage_0": 0.011462044250220061, "rewards/frontier_coverage_1": 0.011462044250220061, "rewards/frontier_coverage_10": 0.011462044250220061, "rewards/frontier_coverage_15": 0.011462044250220061, "rewards/frontier_coverage_20": 0.01288942052051425, "rewards/frontier_coverage_25": 0.03849505893886089, "rewards/frontier_coverage_5": 0.011462044250220061, "rewards/frontier_ece_reward": 0.007876492012292147, "rewards/frontier_entropy_batch_reward": -0.22840518951416017, "signal/accuracy_reward/centered_abs_mean": 0.14632704257965087, "signal/accuracy_reward/group_bin_occupancy": 0.196875, "signal/accuracy_reward/group_std_mean": 0.1965962290763855, "signal/accuracy_reward/group_zero_std_frac": 0.42500001192092896, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07316352128982544, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07316352128982544, "signal/advantage_abs_mean": 0.08877704441547393, "signal/advantage_pre_scale_abs_mean": 0.08877704441547393, "signal/advantage_pre_scale_std": 0.14888640940189363, "signal/advantage_std": 0.14888640940189363, "signal/brier_reward/centered_abs_mean": 0.14254556894302367, "signal/brier_reward/group_bin_occupancy": 0.8364583333333334, "signal/brier_reward/group_std_mean": 0.18436427116394044, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014254557155072688, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014254557155072688, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029425183311104774, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8427083333333332, "signal/confidence_uniqueness_reward/group_std_mean": 0.05025056228041649, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002942518377676606, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002942518377676606, "signal/format_reward/centered_abs_mean": 0.01655815988779068, "signal/format_reward/group_bin_occupancy": 0.14479166666666668, "signal/format_reward/group_std_mean": 0.034861961379647255, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00827907994389534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00827907994389534, "signal/frontier_aurc_reward/centered_abs_mean": 0.00209680434782058, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6986111111111111, "signal/frontier_aurc_reward/group_std_mean": 0.003620346961542964, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6210054784314707e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6210054784314707e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18070359230041505, "signal/frontier_coverage_0/group_bin_occupancy": 0.8350694444444444, "signal/frontier_coverage_0/group_std_mean": 0.23828611075878142, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_1/centered_abs_mean": 0.18070359230041505, "signal/frontier_coverage_1/group_bin_occupancy": 0.8350694444444444, "signal/frontier_coverage_1/group_std_mean": 0.23828611075878142, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_10/centered_abs_mean": 0.18070359230041505, "signal/frontier_coverage_10/group_bin_occupancy": 0.8350694444444444, "signal/frontier_coverage_10/group_std_mean": 0.23828611075878142, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_15/centered_abs_mean": 0.18070359230041505, "signal/frontier_coverage_15/group_bin_occupancy": 0.8350694444444444, "signal/frontier_coverage_15/group_std_mean": 0.23828611075878142, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_20/centered_abs_mean": 0.17324572205543518, "signal/frontier_coverage_20/group_bin_occupancy": 0.8274305555555556, "signal/frontier_coverage_20/group_std_mean": 0.22867600619792938, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021655716467648745, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021655716467648745, "signal/frontier_coverage_25/centered_abs_mean": 0.08664727360010147, "signal/frontier_coverage_25/group_bin_occupancy": 0.8840277777777779, "signal/frontier_coverage_25/group_std_mean": 0.1133154422044754, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010830909595824778, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010830909595824778, "signal/frontier_coverage_5/centered_abs_mean": 0.18070359230041505, "signal/frontier_coverage_5/group_bin_occupancy": 0.8350694444444444, "signal/frontier_coverage_5/group_std_mean": 0.23828611075878142, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002258794941008091, "signal/frontier_ece_reward/centered_abs_mean": 0.025348235666751862, "signal/frontier_ece_reward/group_bin_occupancy": 0.7229166666666668, "signal/frontier_ece_reward/group_std_mean": 0.032069115340709685, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025348236784338953, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025348236784338953, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2727514892816544, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7552083333333334, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34558807611465453, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027275149524211884, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027275149524211884, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.14901822974262594, "eval_calibration/batch_distribution_entropy": 0.9304030834834719, "eval_calibration/batch_entropy_100bins": 0.6979890618590382, "eval_calibration/batch_entropy_10bins": 0.9304030834834719, "eval_calibration/batch_entropy_50bins": 0.7766607817845022, "eval_calibration/batch_uniqueness": 0.8930084974900759, "eval_calibration/buffer_distribution_entropy": 0.97765497693518, "eval_calibration/buffer_entropy_100bins": 0.9841955955582375, "eval_calibration/buffer_entropy_10bins": 0.97765497693518, "eval_calibration/buffer_entropy_50bins": 0.9847609370459999, "eval_calibration/confidence_entropy": 0.4936314506498249, "eval_calibration/coverage@0%": 0.26278001792114697, "eval_calibration/coverage@1%": 0.26278001792114697, "eval_calibration/coverage@10%": 0.45868055555555554, "eval_calibration/coverage@15%": 0.6025649641577061, "eval_calibration/coverage@20%": 0.7132840501792114, "eval_calibration/coverage@25%": 0.9083893369175627, "eval_calibration/coverage@30%": 0.9567988351254479, "eval_calibration/coverage@5%": 0.3044466845878136, "eval_calibration/ece": 0.2226041519685852, "eval_calibration/mean_confidence": 0.5655246136044548, "eval_calibration/prompt_uniqueness": 0.8930084974900759, "eval_completions/clipped_ratio": 0.010416666666666666, "eval_completions/max_length": 2470.5, "eval_completions/max_terminated_length": 2470.5, "eval_completions/mean_length": 634.5142517089844, "eval_completions/mean_terminated_length": 641.2839864095052, "eval_completions/min_length": 52.5, "eval_completions/min_terminated_length": 234.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 298591861.0, "eval_reward": 0.9375461836655935, "eval_reward_std": 0.23832263300816217, "eval_rewards/accuracy_reward": 0.6762152711550394, "eval_rewards/brier_reward": 0.782086193561554, "eval_rewards/confidence_uniqueness_reward": 0.8867801527182261, "eval_rewards/format_reward": 0.987847218910853, "eval_rewards/frontier_aurc_reward": -0.0018945778623068084, "eval_rewards/frontier_coverage_0": 0.008154223828266064, "eval_rewards/frontier_coverage_1": 0.008154223828266064, "eval_rewards/frontier_coverage_10": 0.008154223828266064, "eval_rewards/frontier_coverage_15": 0.008154223828266064, "eval_rewards/frontier_coverage_20": 0.013963257893919945, "eval_rewards/frontier_coverage_25": 0.05141168336073557, "eval_rewards/frontier_coverage_5": 0.008154223828266064, "eval_rewards/frontier_ece_reward": 0.004938475166757901, "eval_rewards/frontier_entropy_batch_reward": -0.6316870252291361, "eval_runtime": 214.6923, "eval_samples_per_second": 4.658, "eval_signal/accuracy_reward/centered_abs_mean": 0.4254014740387599, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4673873384793599, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21270073701937994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21270073701937994, "eval_signal/advantage_abs_mean": 0.20312496026357016, "eval_signal/advantage_pre_scale_abs_mean": 0.20312496026357016, "eval_signal/advantage_pre_scale_std": 0.23702458292245865, "eval_signal/advantage_std": 0.23702458292245865, "eval_signal/brier_reward/centered_abs_mean": 0.202142134308815, "eval_signal/brier_reward/group_bin_occupancy": 0.8888888888888888, "eval_signal/brier_reward/group_std_mean": 0.2583857501546542, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020214214610556763, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020214214610556763, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05369566256801287, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3923611111111111, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08944027374188106, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005369566303367416, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005369566303367416, "eval_signal/format_reward/centered_abs_mean": 0.023328992693374555, "eval_signal/format_reward/group_bin_occupancy": 0.16666666666666666, "eval_signal/format_reward/group_std_mean": 0.06276767483601968, "eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011664496346687278, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.011664496346687278, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0031803955983680985, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5972222222222222, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007048736986083289, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9754943524409704e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9754943524409704e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.28077225387096405, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9374999999999999, "eval_signal/frontier_coverage_0/group_std_mean": 0.39534174899260205, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.28077225387096405, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9374999999999999, "eval_signal/frontier_coverage_1/group_std_mean": 0.39534174899260205, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.28077225387096405, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9374999999999999, "eval_signal/frontier_coverage_10/group_std_mean": 0.39534174899260205, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.28077225387096405, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9374999999999999, "eval_signal/frontier_coverage_15/group_std_mean": 0.39534174899260205, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.21713952968517938, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8993055555555557, "eval_signal/frontier_coverage_20/group_std_mean": 0.3139382104078929, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027142442607631287, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027142442607631287, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08837362627188365, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.954861111111111, "eval_signal/frontier_coverage_25/group_std_mean": 0.1131880668302377, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011046703827256958, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011046703827256958, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.28077225387096405, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9374999999999999, "eval_signal/frontier_coverage_5/group_std_mean": 0.39534174899260205, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035096531501039863, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03151553000013033, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9375, "eval_signal/frontier_ece_reward/group_std_mean": 0.042221867789824806, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031515529456858835, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031515529456858835, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3152608970801036, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2916666666666667, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3325229287147522, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03152609150856733, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03152609150856733, "eval_steps_per_second": 0.028, "step": 150 }, { "calibration/aurc": 0.12418416862657974, "calibration/batch_distribution_entropy": 0.9738042618432547, "calibration/batch_entropy_100bins": 0.959685408987745, "calibration/batch_entropy_10bins": 0.9738042618432547, "calibration/batch_entropy_50bins": 0.9715188105067746, "calibration/batch_uniqueness": 0.9511053370700557, "calibration/buffer_distribution_entropy": 0.9796740268447254, "calibration/buffer_entropy_100bins": 0.9863859099776396, "calibration/buffer_entropy_10bins": 0.9796740268447254, "calibration/buffer_entropy_50bins": 0.986449386820567, "calibration/confidence_entropy": 0.4959676103167527, "calibration/coverage@0%": 0.05926240634931017, "calibration/coverage@1%": 0.05926240634931017, "calibration/coverage@10%": 0.6127707884216798, "calibration/coverage@15%": 0.7128341997211541, "calibration/coverage@20%": 0.8080721228110945, "calibration/coverage@25%": 0.8990786433912484, "calibration/coverage@30%": 0.9890339425587467, "calibration/coverage@5%": 0.3137256346668646, "calibration/ece": 0.21717115584354518, "calibration/mean_confidence": 0.5605943705821324, "calibration/prompt_uniqueness": 0.8582839084278222, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009027777777777768, "completions/max_length": 3711.8, "completions/max_terminated_length": 3711.8, "completions/mean_length": 620.5934936523438, "completions/mean_terminated_length": 626.3040405273438, "completions/min_length": 0.0, "completions/min_terminated_length": 187.2, "epoch": 0.3719953500581243, "grad_norm": 0.00040308100869879127, "learning_rate": 1.5963855421686747e-06, "loss": -0.0072, "num_tokens": 308848810.0, "reward": 1.0218815565109254, "reward_std": 0.12359268218278885, "rewards/accuracy_reward": 0.7443576335906983, "rewards/brier_reward": 0.7995624542236328, "rewards/confidence_uniqueness_reward": 0.9407005667686462, "rewards/format_reward": 0.9903645873069763, "rewards/frontier_aurc_reward": -0.0011058273608796298, "rewards/frontier_coverage_0": -0.020049982517957688, "rewards/frontier_coverage_1": -0.020049982517957688, "rewards/frontier_coverage_10": -0.020049982517957688, "rewards/frontier_coverage_15": -0.020049982517957688, "rewards/frontier_coverage_20": 0.007909675501286984, "rewards/frontier_coverage_25": 0.07766608744859696, "rewards/frontier_coverage_5": -0.020049982517957688, "rewards/frontier_ece_reward": 0.0013356797680899035, "rewards/frontier_entropy_batch_reward": -0.19442155659198762, "signal/accuracy_reward/centered_abs_mean": 0.1566785991191864, "signal/accuracy_reward/group_bin_occupancy": 0.19791666666666666, "signal/accuracy_reward/group_std_mean": 0.20641724467277528, "signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0783392995595932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0783392995595932, "signal/advantage_abs_mean": 0.09009798169136048, "signal/advantage_pre_scale_abs_mean": 0.09009798169136048, "signal/advantage_pre_scale_std": 0.15116022229194642, "signal/advantage_std": 0.15116022229194642, "signal/brier_reward/centered_abs_mean": 0.14188904762268068, "signal/brier_reward/group_bin_occupancy": 0.8482638888888889, "signal/brier_reward/group_std_mean": 0.18152420222759247, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014188905246555805, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014188905246555805, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028706640005111694, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8447916666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.04965458139777183, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002870664047077298, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002870664047077298, "signal/format_reward/centered_abs_mean": 0.01727973110973835, "signal/format_reward/group_bin_occupancy": 0.14513888888888887, "signal/format_reward/group_std_mean": 0.03610437363386154, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008639865554869175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008639865554869175, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016173893585801125, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7194444444444444, "signal/frontier_aurc_reward/group_std_mean": 0.002777449763379991, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.021736818278441e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.021736818278441e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19061762392520903, "signal/frontier_coverage_0/group_bin_occupancy": 0.8347222222222221, "signal/frontier_coverage_0/group_std_mean": 0.2524797976016998, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_1/centered_abs_mean": 0.19061762392520903, "signal/frontier_coverage_1/group_bin_occupancy": 0.8347222222222221, "signal/frontier_coverage_1/group_std_mean": 0.2524797976016998, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_10/centered_abs_mean": 0.19061762392520903, "signal/frontier_coverage_10/group_bin_occupancy": 0.8347222222222221, "signal/frontier_coverage_10/group_std_mean": 0.2524797976016998, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_15/centered_abs_mean": 0.19061762392520903, "signal/frontier_coverage_15/group_bin_occupancy": 0.8347222222222221, "signal/frontier_coverage_15/group_std_mean": 0.2524797976016998, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_20/centered_abs_mean": 0.10665316879749298, "signal/frontier_coverage_20/group_bin_occupancy": 0.8378472222222222, "signal/frontier_coverage_20/group_std_mean": 0.1441801980137825, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013331646099686623, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013331646099686623, "signal/frontier_coverage_25/centered_abs_mean": 0.07222038358449936, "signal/frontier_coverage_25/group_bin_occupancy": 0.9309027777777776, "signal/frontier_coverage_25/group_std_mean": 0.0917926698923111, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009027547785080969, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009027547785080969, "signal/frontier_coverage_5/centered_abs_mean": 0.19061762392520903, "signal/frontier_coverage_5/group_bin_occupancy": 0.8347222222222221, "signal/frontier_coverage_5/group_std_mean": 0.2524797976016998, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002382720448076725, "signal/frontier_ece_reward/centered_abs_mean": 0.023065327480435372, "signal/frontier_ece_reward/group_bin_occupancy": 0.7246527777777778, "signal/frontier_ece_reward/group_std_mean": 0.02965252809226513, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023065326735377313, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023065326735377313, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24839998483657838, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7788194444444445, "signal/frontier_entropy_batch_reward/group_std_mean": 0.31682642698287966, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02484000064432621, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02484000064432621, "step": 155 }, { "calibration/aurc": 0.13573148767459628, "calibration/batch_distribution_entropy": 0.9496131093908463, "calibration/batch_entropy_100bins": 0.9478886953307164, "calibration/batch_entropy_10bins": 0.9496131093908463, "calibration/batch_entropy_50bins": 0.9558848076576348, "calibration/batch_uniqueness": 0.9477837762673808, "calibration/buffer_distribution_entropy": 0.9833397575756475, "calibration/buffer_entropy_100bins": 0.9902063640434747, "calibration/buffer_entropy_10bins": 0.9833397575756475, "calibration/buffer_entropy_50bins": 0.9895285524682471, "calibration/confidence_entropy": 0.5064340839226602, "calibration/coverage@0%": 0.06199548520452567, "calibration/coverage@1%": 0.100016318537859, "calibration/coverage@10%": 0.4863422228857656, "calibration/coverage@15%": 0.7070152931318631, "calibration/coverage@20%": 0.8079925379000178, "calibration/coverage@25%": 0.8801128926701571, "calibration/coverage@30%": 0.9193717277486911, "calibration/coverage@5%": 0.3584050596726108, "calibration/ece": 0.1611499914834087, "calibration/mean_confidence": 0.6053540170302975, "calibration/prompt_uniqueness": 0.8627381388571653, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013194444444444443, "completions/max_length": 3444.2, "completions/max_terminated_length": 3444.2, "completions/mean_length": 622.8822998046875, "completions/mean_terminated_length": 631.2034301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 193.8, "epoch": 0.38399520005999926, "grad_norm": 0.0003652535378932953, "learning_rate": 1.4457831325301204e-06, "loss": -0.0111, "num_tokens": 319111710.0, "reward": 0.988642418384552, "reward_std": 0.1208130583167076, "rewards/accuracy_reward": 0.6771701335906982, "rewards/brier_reward": 0.7907926917076111, "rewards/confidence_uniqueness_reward": 0.9372875452041626, "rewards/format_reward": 0.9866319298744202, "rewards/frontier_aurc_reward": -0.0019396688556298613, "rewards/frontier_coverage_0": 0.009741135686635972, "rewards/frontier_coverage_1": 0.009741135686635972, "rewards/frontier_coverage_10": 0.009741135686635972, "rewards/frontier_coverage_15": 0.01013163048774004, "rewards/frontier_coverage_20": 0.027610554732382296, "rewards/frontier_coverage_25": 0.08821047395467758, "rewards/frontier_coverage_5": 0.009741135686635972, "rewards/frontier_ece_reward": 0.0028510759511846118, "rewards/frontier_entropy_batch_reward": -0.18388957977294923, "signal/accuracy_reward/centered_abs_mean": 0.14061957597732544, "signal/accuracy_reward/group_bin_occupancy": 0.19201388888888887, "signal/accuracy_reward/group_std_mean": 0.1869141399860382, "signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07030978798866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07030978798866272, "signal/advantage_abs_mean": 0.08863120973110199, "signal/advantage_pre_scale_abs_mean": 0.08863120973110199, "signal/advantage_pre_scale_std": 0.15098720490932466, "signal/advantage_std": 0.15098720490932466, "signal/brier_reward/centered_abs_mean": 0.14210671186447144, "signal/brier_reward/group_bin_occupancy": 0.8583333333333334, "signal/brier_reward/group_std_mean": 0.18185594975948333, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014210670255124569, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014210670255124569, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033150676265358926, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.846875, "signal/confidence_uniqueness_reward/group_std_mean": 0.052501931041479113, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003315067803487182, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003315067803487182, "signal/format_reward/centered_abs_mean": 0.02215711772441864, "signal/format_reward/group_bin_occupancy": 0.14409722222222224, "signal/format_reward/group_std_mean": 0.03936988487839699, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01107855886220932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01107855886220932, "signal/frontier_aurc_reward/centered_abs_mean": 0.002279521874152124, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6902777777777778, "signal/frontier_aurc_reward/group_std_mean": 0.0038658153265714646, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8494023717939852e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8494023717939852e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17956892549991607, "signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_0/group_std_mean": 0.23461733758449554, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_coverage_1/centered_abs_mean": 0.17956892549991607, "signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_1/group_std_mean": 0.23461733758449554, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_coverage_10/centered_abs_mean": 0.17956892549991607, "signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_10/group_std_mean": 0.23461733758449554, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_coverage_15/centered_abs_mean": 0.17778740525245668, "signal/frontier_coverage_15/group_bin_occupancy": 0.8458333333333332, "signal/frontier_coverage_15/group_std_mean": 0.2324183076620102, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00222234264947474, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00222234264947474, "signal/frontier_coverage_20/centered_abs_mean": 0.06870782449841499, "signal/frontier_coverage_20/group_bin_occupancy": 0.9017361111111111, "signal/frontier_coverage_20/group_std_mean": 0.0908221110701561, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008588478667661548, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008588478667661548, "signal/frontier_coverage_25/centered_abs_mean": 0.0829654261469841, "signal/frontier_coverage_25/group_bin_occupancy": 0.9145833333333334, "signal/frontier_coverage_25/group_std_mean": 0.10622318387031555, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010370678268373013, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010370678268373013, "signal/frontier_coverage_5/centered_abs_mean": 0.17956892549991607, "signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_5/group_std_mean": 0.23461733758449554, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002244611643254757, "signal/frontier_ece_reward/centered_abs_mean": 0.02158619686961174, "signal/frontier_ece_reward/group_bin_occupancy": 0.7381944444444445, "signal/frontier_ece_reward/group_std_mean": 0.027624867483973505, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021586197894066573, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021586197894066573, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23778702020645143, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7749999999999999, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3028127193450928, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023778701573610304, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023778701573610304, "step": 160 }, { "calibration/aurc": 0.15272836679211094, "calibration/batch_distribution_entropy": 0.9627599417273558, "calibration/batch_entropy_100bins": 0.9522571385844725, "calibration/batch_entropy_10bins": 0.9627599417273558, "calibration/batch_entropy_50bins": 0.9613007206497416, "calibration/batch_uniqueness": 0.9484824117838434, "calibration/buffer_distribution_entropy": 0.9855117090133282, "calibration/buffer_entropy_100bins": 0.9919619031900998, "calibration/buffer_entropy_10bins": 0.9855117090133282, "calibration/buffer_entropy_50bins": 0.9911634713514934, "calibration/confidence_entropy": 0.4857673257774547, "calibration/coverage@0%": 0.049065118846368376, "calibration/coverage@1%": 0.07951131307209017, "calibration/coverage@10%": 0.5366995054961048, "calibration/coverage@15%": 0.6177436479560819, "calibration/coverage@20%": 0.6741639757050029, "calibration/coverage@25%": 0.7182050523398605, "calibration/coverage@30%": 0.8612321694819425, "calibration/coverage@5%": 0.28688341146053287, "calibration/ece": 0.1691076617981678, "calibration/mean_confidence": 0.5515990495421639, "calibration/prompt_uniqueness": 0.8613246886758482, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01640625000000002, "completions/max_length": 3573.0, "completions/max_terminated_length": 3573.0, "completions/mean_length": 642.81328125, "completions/mean_terminated_length": 653.6934326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 206.4, "epoch": 0.39599505006187424, "grad_norm": 0.00042818221845664084, "learning_rate": 1.2951807228915664e-06, "loss": -0.012, "num_tokens": 329655991.0, "reward": 0.9806491017341614, "reward_std": 0.12677238285541534, "rewards/accuracy_reward": 0.6590277671813964, "rewards/brier_reward": 0.7924768686294555, "rewards/confidence_uniqueness_reward": 0.9346436381340026, "rewards/format_reward": 0.983506953716278, "rewards/frontier_aurc_reward": -0.001722504827193916, "rewards/frontier_coverage_0": 0.028906658757478, "rewards/frontier_coverage_1": 0.028906658757478, "rewards/frontier_coverage_10": 0.028906658757478, "rewards/frontier_coverage_15": 0.031030337116681038, "rewards/frontier_coverage_20": 0.0429856464266777, "rewards/frontier_coverage_25": 0.10405687540769577, "rewards/frontier_coverage_5": 0.028906658757478, "rewards/frontier_ece_reward": 0.003673038515262306, "rewards/frontier_entropy_batch_reward": -0.17347353994846343, "signal/accuracy_reward/centered_abs_mean": 0.14384765326976776, "signal/accuracy_reward/group_bin_occupancy": 0.19444444444444445, "signal/accuracy_reward/group_std_mean": 0.1913081645965576, "signal/accuracy_reward/group_zero_std_frac": 0.44444444179534914, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07192382663488388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07192382663488388, "signal/advantage_abs_mean": 0.09211225062608719, "signal/advantage_pre_scale_abs_mean": 0.09211225062608719, "signal/advantage_pre_scale_std": 0.15569303929805756, "signal/advantage_std": 0.15569303929805756, "signal/brier_reward/centered_abs_mean": 0.1462089329957962, "signal/brier_reward/group_bin_occupancy": 0.8246527777777779, "signal/brier_reward/group_std_mean": 0.18975663781166077, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014620893821120261, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014620893821120261, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03635745905339718, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.820486111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.05932655856013298, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003635745914652944, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003635745914652944, "signal/format_reward/centered_abs_mean": 0.02596571184694767, "signal/format_reward/group_bin_occupancy": 0.1486111111111111, "signal/format_reward/group_std_mean": 0.04700228720903397, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012982855923473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012982855923473835, "signal/frontier_aurc_reward/centered_abs_mean": 0.002158830175176263, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445, "signal/frontier_aurc_reward/group_std_mean": 0.0039873755071312186, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6985378281096927e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6985378281096927e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19089278280735017, "signal/frontier_coverage_0/group_bin_occupancy": 0.8347222222222224, "signal/frontier_coverage_0/group_std_mean": 0.2473309278488159, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_coverage_1/centered_abs_mean": 0.19089278280735017, "signal/frontier_coverage_1/group_bin_occupancy": 0.8347222222222224, "signal/frontier_coverage_1/group_std_mean": 0.2473309278488159, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_coverage_10/centered_abs_mean": 0.19089278280735017, "signal/frontier_coverage_10/group_bin_occupancy": 0.8347222222222224, "signal/frontier_coverage_10/group_std_mean": 0.2473309278488159, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_coverage_15/centered_abs_mean": 0.1793476462364197, "signal/frontier_coverage_15/group_bin_occupancy": 0.8326388888888889, "signal/frontier_coverage_15/group_std_mean": 0.2329329788684845, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002241845661774278, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002241845661774278, "signal/frontier_coverage_20/centered_abs_mean": 0.06690727174282074, "signal/frontier_coverage_20/group_bin_occupancy": 0.9097222222222221, "signal/frontier_coverage_20/group_std_mean": 0.08647293150424958, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008363408851437271, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008363408851437271, "signal/frontier_coverage_25/centered_abs_mean": 0.0914057046175003, "signal/frontier_coverage_25/group_bin_occupancy": 0.9003472222222222, "signal/frontier_coverage_25/group_std_mean": 0.1181455373764038, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011425713310018182, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011425713310018182, "signal/frontier_coverage_5/centered_abs_mean": 0.19089278280735017, "signal/frontier_coverage_5/group_bin_occupancy": 0.8347222222222224, "signal/frontier_coverage_5/group_std_mean": 0.2473309278488159, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023861598689109086, "signal/frontier_ece_reward/centered_abs_mean": 0.022229710966348647, "signal/frontier_ece_reward/group_bin_occupancy": 0.7388888888888889, "signal/frontier_ece_reward/group_std_mean": 0.02815890610218048, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022229711525142194, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022229711525142194, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23444273173809052, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775, "signal/frontier_entropy_batch_reward/group_std_mean": 0.30144866108894347, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02344427481293678, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02344427481293678, "step": 165 }, { "calibration/aurc": 0.1350255119426556, "calibration/batch_distribution_entropy": 0.9467153136348341, "calibration/batch_entropy_100bins": 0.9448354422616114, "calibration/batch_entropy_10bins": 0.9467153136348341, "calibration/batch_entropy_50bins": 0.9524946276164183, "calibration/batch_uniqueness": 0.9455724587067573, "calibration/buffer_distribution_entropy": 0.9863591468776306, "calibration/buffer_entropy_100bins": 0.9924262206930579, "calibration/buffer_entropy_10bins": 0.9863591468776306, "calibration/buffer_entropy_50bins": 0.9916857227941385, "calibration/confidence_entropy": 0.4943049964540047, "calibration/coverage@0%": 0.10312881092467825, "calibration/coverage@1%": 0.10312881092467825, "calibration/coverage@10%": 0.46204870130918313, "calibration/coverage@15%": 0.5477847318292854, "calibration/coverage@20%": 0.7477854206561981, "calibration/coverage@25%": 0.833632965941813, "calibration/coverage@30%": 0.9413540434344725, "calibration/coverage@5%": 0.3250324397385173, "calibration/ece": 0.13538983337137994, "calibration/mean_confidence": 0.6091723019901613, "calibration/prompt_uniqueness": 0.8639295314719903, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666652, "completions/max_length": 3396.8, "completions/max_terminated_length": 3396.8, "completions/mean_length": 623.0584350585938, "completions/mean_terminated_length": 630.6281982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 185.2, "epoch": 0.4079949000637492, "grad_norm": 0.00041778094600886106, "learning_rate": 1.1445783132530121e-06, "loss": -0.0105, "num_tokens": 339922808.0, "reward": 1.0096432447433472, "reward_std": 0.1250714048743248, "rewards/accuracy_reward": 0.7217013955116272, "rewards/brier_reward": 0.8008638501167298, "rewards/confidence_uniqueness_reward": 0.9373578429222107, "rewards/format_reward": 0.9880208373069763, "rewards/frontier_aurc_reward": -0.00128103963797912, "rewards/frontier_coverage_0": -0.011224100925028324, "rewards/frontier_coverage_1": -0.011224100925028324, "rewards/frontier_coverage_10": -0.011125411931425333, "rewards/frontier_coverage_15": 0.000582283828407526, "rewards/frontier_coverage_20": 0.05009397864341736, "rewards/frontier_coverage_25": 0.1337550863623619, "rewards/frontier_coverage_5": -0.011224100925028324, "rewards/frontier_ece_reward": -0.0010974591568810865, "rewards/frontier_entropy_batch_reward": -0.2065970182418823, "signal/accuracy_reward/centered_abs_mean": 0.14814453125, "signal/accuracy_reward/group_bin_occupancy": 0.19479166666666667, "signal/accuracy_reward/group_std_mean": 0.19610781967639923, "signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074072265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.074072265625, "signal/advantage_abs_mean": 0.0921988844871521, "signal/advantage_pre_scale_abs_mean": 0.0921988844871521, "signal/advantage_pre_scale_std": 0.15291462242603301, "signal/advantage_std": 0.15291462242603301, "signal/brier_reward/centered_abs_mean": 0.14049543142318727, "signal/brier_reward/group_bin_occupancy": 0.8541666666666666, "signal/brier_reward/group_std_mean": 0.17984696626663207, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01404954344034195, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01404954344034195, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0325088482350111, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.834375, "signal/confidence_uniqueness_reward/group_std_mean": 0.05347518250346184, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032508848700672386, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032508848700672386, "signal/format_reward/centered_abs_mean": 0.020540364272892474, "signal/format_reward/group_bin_occupancy": 0.1454861111111111, "signal/format_reward/group_std_mean": 0.039224734902381896, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010270182136446237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010270182136446237, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017802180489525199, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6770833333333333, "signal/frontier_aurc_reward/group_std_mean": 0.003222810197621584, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2252726193983107e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2252726193983107e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1828035831451416, "signal/frontier_coverage_0/group_bin_occupancy": 0.8583333333333334, "signal/frontier_coverage_0/group_std_mean": 0.2377503514289856, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022850447334349156, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022850447334349156, "signal/frontier_coverage_1/centered_abs_mean": 0.1828035831451416, "signal/frontier_coverage_1/group_bin_occupancy": 0.8583333333333334, "signal/frontier_coverage_1/group_std_mean": 0.2377503514289856, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022850447334349156, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022850447334349156, "signal/frontier_coverage_10/centered_abs_mean": 0.18264002799987794, "signal/frontier_coverage_10/group_bin_occupancy": 0.8586805555555556, "signal/frontier_coverage_10/group_std_mean": 0.2375439763069153, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022830002941191196, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022830002941191196, "signal/frontier_coverage_15/centered_abs_mean": 0.15461563766002656, "signal/frontier_coverage_15/group_bin_occupancy": 0.8496527777777778, "signal/frontier_coverage_15/group_std_mean": 0.20210520327091216, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019326955080032349, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019326955080032349, "signal/frontier_coverage_20/centered_abs_mean": 0.06322543397545814, "signal/frontier_coverage_20/group_bin_occupancy": 0.9326388888888889, "signal/frontier_coverage_20/group_std_mean": 0.08029639273881913, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007903179153800011, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007903179153800011, "signal/frontier_coverage_25/centered_abs_mean": 0.101860611140728, "signal/frontier_coverage_25/group_bin_occupancy": 0.9097222222222221, "signal/frontier_coverage_25/group_std_mean": 0.1299730733036995, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012732576811686157, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012732576811686157, "signal/frontier_coverage_5/centered_abs_mean": 0.1828035831451416, "signal/frontier_coverage_5/group_bin_occupancy": 0.8583333333333334, "signal/frontier_coverage_5/group_std_mean": 0.2377503514289856, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022850447334349156, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022850447334349156, "signal/frontier_ece_reward/centered_abs_mean": 0.02175499051809311, "signal/frontier_ece_reward/group_bin_occupancy": 0.7347222222222223, "signal/frontier_ece_reward/group_std_mean": 0.02739621587097645, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021754990331828592, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021754990331828592, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25911190211772916, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7975694444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3297302842140198, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02591119073331356, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02591119073331356, "step": 170 }, { "calibration/aurc": 0.11596802537585997, "calibration/batch_distribution_entropy": 0.9772162270629645, "calibration/batch_entropy_100bins": 0.9590083538748349, "calibration/batch_entropy_10bins": 0.9772162270629645, "calibration/batch_entropy_50bins": 0.9696121620848036, "calibration/batch_uniqueness": 0.9508310218025986, "calibration/buffer_distribution_entropy": 0.985610419410986, "calibration/buffer_entropy_100bins": 0.9920422218752784, "calibration/buffer_entropy_10bins": 0.985610419410986, "calibration/buffer_entropy_50bins": 0.9912291646995455, "calibration/confidence_entropy": 0.4961519915892487, "calibration/coverage@0%": 0.08600478646452267, "calibration/coverage@1%": 0.08600478646452267, "calibration/coverage@10%": 0.4732605731669655, "calibration/coverage@15%": 0.7201678681063644, "calibration/coverage@20%": 0.8435295212461836, "calibration/coverage@25%": 0.9484174474094432, "calibration/coverage@30%": 0.9905013192612138, "calibration/coverage@5%": 0.30627206208368407, "calibration/ece": 0.18982087542522702, "calibration/mean_confidence": 0.5509683002177224, "calibration/prompt_uniqueness": 0.8649400485298095, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013888888888888905, "completions/max_length": 3743.8, "completions/max_terminated_length": 3743.8, "completions/mean_length": 650.516845703125, "completions/mean_terminated_length": 659.7272583007813, "completions/min_length": 0.0, "completions/min_terminated_length": 172.4, "epoch": 0.4199947500656242, "grad_norm": 0.0004142906400375068, "learning_rate": 9.93975903614458e-07, "loss": -0.0114, "num_tokens": 350524730.0, "reward": 1.0014106631278992, "reward_std": 0.12218387722969055, "rewards/accuracy_reward": 0.7021701455116272, "rewards/brier_reward": 0.7892498970031738, "rewards/confidence_uniqueness_reward": 0.9369961380958557, "rewards/format_reward": 0.9857638835906982, "rewards/frontier_aurc_reward": -0.001478305645287037, "rewards/frontier_coverage_0": -0.006911272555589676, "rewards/frontier_coverage_1": -0.006911272555589676, "rewards/frontier_coverage_10": -0.006758286617696285, "rewards/frontier_coverage_15": 0.0018855141475796699, "rewards/frontier_coverage_20": 0.05244411379098892, "rewards/frontier_coverage_25": 0.13218926042318344, "rewards/frontier_coverage_5": -0.006911272555589676, "rewards/frontier_ece_reward": -0.00075130017939955, "rewards/frontier_entropy_batch_reward": -0.1707520604133606, "signal/accuracy_reward/centered_abs_mean": 0.1440049946308136, "signal/accuracy_reward/group_bin_occupancy": 0.19618055555555555, "signal/accuracy_reward/group_std_mean": 0.19517480432987214, "signal/accuracy_reward/group_zero_std_frac": 0.43055556416511537, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0720024973154068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0720024973154068, "signal/advantage_abs_mean": 0.08900740891695022, "signal/advantage_pre_scale_abs_mean": 0.08900740891695022, "signal/advantage_pre_scale_std": 0.15330225825309754, "signal/advantage_std": 0.15330225825309754, "signal/brier_reward/centered_abs_mean": 0.14266086518764495, "signal/brier_reward/group_bin_occupancy": 0.8486111111111111, "signal/brier_reward/group_std_mean": 0.18242388367652893, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01426608581095934, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01426608581095934, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03368383906781673, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8496527777777777, "signal/confidence_uniqueness_reward/group_std_mean": 0.052182822674512866, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003368384018540382, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003368384018540382, "signal/format_reward/centered_abs_mean": 0.022667100839316844, "signal/format_reward/group_bin_occupancy": 0.14340277777777777, "signal/format_reward/group_std_mean": 0.03894899114966392, "signal/format_reward/group_zero_std_frac": 0.8527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011333550419658422, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011333550419658422, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019595161313191055, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6739583333333332, "signal/frontier_aurc_reward/group_std_mean": 0.003604071820154786, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.449395142321009e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.449395142321009e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18813599050045013, "signal/frontier_coverage_0/group_bin_occupancy": 0.8409722222222221, "signal/frontier_coverage_0/group_std_mean": 0.24627106189727782, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023517000023275613, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023517000023275613, "signal/frontier_coverage_1/centered_abs_mean": 0.18813599050045013, "signal/frontier_coverage_1/group_bin_occupancy": 0.8409722222222221, "signal/frontier_coverage_1/group_std_mean": 0.24627106189727782, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023517000023275613, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023517000023275613, "signal/frontier_coverage_10/centered_abs_mean": 0.18753766417503356, "signal/frontier_coverage_10/group_bin_occupancy": 0.8399305555555555, "signal/frontier_coverage_10/group_std_mean": 0.24552586376667024, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002344220783561468, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002344220783561468, "signal/frontier_coverage_15/centered_abs_mean": 0.14854539334774017, "signal/frontier_coverage_15/group_bin_occupancy": 0.834375, "signal/frontier_coverage_15/group_std_mean": 0.19552876353263854, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018568174680694937, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018568174680694937, "signal/frontier_coverage_20/centered_abs_mean": 0.06628052592277527, "signal/frontier_coverage_20/group_bin_occupancy": 0.9263888888888889, "signal/frontier_coverage_20/group_std_mean": 0.08446042537689209, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008285066462121904, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008285066462121904, "signal/frontier_coverage_25/centered_abs_mean": 0.10350020378828048, "signal/frontier_coverage_25/group_bin_occupancy": 0.8947916666666668, "signal/frontier_coverage_25/group_std_mean": 0.13349340260028839, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012937525752931833, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012937525752931833, "signal/frontier_coverage_5/centered_abs_mean": 0.18813599050045013, "signal/frontier_coverage_5/group_bin_occupancy": 0.8409722222222221, "signal/frontier_coverage_5/group_std_mean": 0.24627106189727782, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023517000023275613, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023517000023275613, "signal/frontier_ece_reward/centered_abs_mean": 0.021230778843164443, "signal/frontier_ece_reward/group_bin_occupancy": 0.7291666666666667, "signal/frontier_ece_reward/group_std_mean": 0.02696690522134304, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002123078005388379, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002123078005388379, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2259067177772522, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775, "signal/frontier_entropy_batch_reward/group_std_mean": 0.28947545886039733, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022590672224760057, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022590672224760057, "step": 175 }, { "calibration/aurc": 0.10141793628292742, "calibration/batch_distribution_entropy": 0.97102851134233, "calibration/batch_entropy_100bins": 0.9559282496757466, "calibration/batch_entropy_10bins": 0.97102851134233, "calibration/batch_entropy_50bins": 0.9685106670847736, "calibration/batch_uniqueness": 0.9508384189223701, "calibration/buffer_distribution_entropy": 0.9859982317210336, "calibration/buffer_entropy_100bins": 0.9922246706460343, "calibration/buffer_entropy_10bins": 0.9859982317210336, "calibration/buffer_entropy_50bins": 0.9914588219746386, "calibration/confidence_entropy": 0.49898035500245025, "calibration/coverage@0%": 0.06338339682072068, "calibration/coverage@1%": 0.13005006348738732, "calibration/coverage@10%": 0.5578450042867328, "calibration/coverage@15%": 0.8041507799553624, "calibration/coverage@20%": 0.9102453475329633, "calibration/coverage@25%": 0.9551202557445988, "calibration/coverage@30%": 0.9805774278215222, "calibration/coverage@5%": 0.2749898127789967, "calibration/ece": 0.1917915387935666, "calibration/mean_confidence": 0.5808151819529532, "calibration/prompt_uniqueness": 0.8569296263730571, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014409722222222232, "completions/max_length": 3805.4, "completions/max_terminated_length": 3805.4, "completions/mean_length": 637.946533203125, "completions/mean_terminated_length": 647.2637451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 199.8, "epoch": 0.4319946000674992, "grad_norm": 0.0003819975827354938, "learning_rate": 8.433734939759036e-07, "loss": -0.0126, "num_tokens": 360973842.0, "reward": 1.0002532005310059, "reward_std": 0.12911737263202666, "rewards/accuracy_reward": 0.7065972208976745, "rewards/brier_reward": 0.7953470587730408, "rewards/confidence_uniqueness_reward": 0.9346136093139649, "rewards/format_reward": 0.9855902910232544, "rewards/frontier_aurc_reward": -0.0019531417870894074, "rewards/frontier_coverage_0": -0.002460658084601164, "rewards/frontier_coverage_1": -0.002460658084601164, "rewards/frontier_coverage_10": -0.002041639015078545, "rewards/frontier_coverage_15": 0.008680144883692264, "rewards/frontier_coverage_20": 0.06160227060317993, "rewards/frontier_coverage_25": 0.1450663238763809, "rewards/frontier_coverage_5": -0.002460658084601164, "rewards/frontier_ece_reward": -0.0004450877895578742, "rewards/frontier_entropy_batch_reward": -0.21341712474823, "signal/accuracy_reward/centered_abs_mean": 0.15309244394302368, "signal/accuracy_reward/group_bin_occupancy": 0.19340277777777778, "signal/accuracy_reward/group_std_mean": 0.19739371538162231, "signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07654622197151184, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07654622197151184, "signal/advantage_abs_mean": 0.09620479941368103, "signal/advantage_pre_scale_abs_mean": 0.09620479941368103, "signal/advantage_pre_scale_std": 0.16078065931797028, "signal/advantage_std": 0.16078065931797028, "signal/brier_reward/centered_abs_mean": 0.13989888727664948, "signal/brier_reward/group_bin_occupancy": 0.835763888888889, "signal/brier_reward/group_std_mean": 0.18042805790901184, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013989889249205589, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013989889249205589, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03554730340838432, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8277777777777778, "signal/confidence_uniqueness_reward/group_std_mean": 0.05846796631813049, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003554730489850044, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003554730489850044, "signal/format_reward/centered_abs_mean": 0.02422960065305233, "signal/format_reward/group_bin_occupancy": 0.1482638888888889, "signal/format_reward/group_std_mean": 0.04518317058682442, "signal/format_reward/group_zero_std_frac": 0.8138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012114800326526166, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012114800326526166, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024452964775264264, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6625, "signal/frontier_aurc_reward/group_std_mean": 0.004462533164769411, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.056620480492711e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.056620480492711e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18128202557563783, "signal/frontier_coverage_0/group_bin_occupancy": 0.8354166666666668, "signal/frontier_coverage_0/group_std_mean": 0.23500166237354278, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002266025450080633, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002266025450080633, "signal/frontier_coverage_1/centered_abs_mean": 0.18128202557563783, "signal/frontier_coverage_1/group_bin_occupancy": 0.8354166666666668, "signal/frontier_coverage_1/group_std_mean": 0.23500166237354278, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002266025450080633, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002266025450080633, "signal/frontier_coverage_10/centered_abs_mean": 0.17996532022953032, "signal/frontier_coverage_10/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_10/group_std_mean": 0.23336804807186126, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002249566651880741, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002249566651880741, "signal/frontier_coverage_15/centered_abs_mean": 0.11691176444292069, "signal/frontier_coverage_15/group_bin_occupancy": 0.8347222222222224, "signal/frontier_coverage_15/group_std_mean": 0.15411899387836456, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00146139704156667, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00146139704156667, "signal/frontier_coverage_20/centered_abs_mean": 0.06802540868520737, "signal/frontier_coverage_20/group_bin_occupancy": 0.928125, "signal/frontier_coverage_20/group_std_mean": 0.08644652813673019, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008503176271915436, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008503176271915436, "signal/frontier_coverage_25/centered_abs_mean": 0.11125858575105667, "signal/frontier_coverage_25/group_bin_occupancy": 0.8972222222222221, "signal/frontier_coverage_25/group_std_mean": 0.1438766151666641, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013907323591411114, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013907323591411114, "signal/frontier_coverage_5/centered_abs_mean": 0.18128202557563783, "signal/frontier_coverage_5/group_bin_occupancy": 0.8354166666666668, "signal/frontier_coverage_5/group_std_mean": 0.23500166237354278, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002266025450080633, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002266025450080633, "signal/frontier_ece_reward/centered_abs_mean": 0.020656683668494224, "signal/frontier_ece_reward/group_bin_occupancy": 0.7170138888888888, "signal/frontier_ece_reward/group_std_mean": 0.02589767798781395, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020656683016568424, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020656683016568424, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2591008573770523, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7864583333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32451775670051575, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025910085812211037, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025910085812211037, "step": 180 }, { "calibration/aurc": 0.1956443021676986, "calibration/batch_distribution_entropy": 0.9721898543770948, "calibration/batch_entropy_100bins": 0.9582683246927992, "calibration/batch_entropy_10bins": 0.9721898543770948, "calibration/batch_entropy_50bins": 0.9690488174101344, "calibration/batch_uniqueness": 0.9514666032368121, "calibration/buffer_distribution_entropy": 0.9854666800775806, "calibration/buffer_entropy_100bins": 0.9919698091057843, "calibration/buffer_entropy_10bins": 0.9854666800775806, "calibration/buffer_entropy_50bins": 0.991146072484287, "calibration/confidence_entropy": 0.5068397741253167, "calibration/coverage@0%": 0.013618119773481246, "calibration/coverage@1%": 0.013618119773481246, "calibration/coverage@10%": 0.1095949729010646, "calibration/coverage@15%": 0.5379352359546358, "calibration/coverage@20%": 0.6796718053797843, "calibration/coverage@25%": 0.8897106640947919, "calibration/coverage@30%": 0.9510526315789474, "calibration/coverage@5%": 0.02723068521850743, "calibration/ece": 0.2253786384553338, "calibration/mean_confidence": 0.5595226877564153, "calibration/prompt_uniqueness": 0.8615784712587196, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012152777777777745, "completions/max_length": 3242.0, "completions/max_terminated_length": 3242.0, "completions/mean_length": 643.6135498046875, "completions/mean_terminated_length": 651.6073974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 184.8, "epoch": 0.44399445006937416, "grad_norm": 0.00043083218042738736, "learning_rate": 6.927710843373495e-07, "loss": -0.0099, "num_tokens": 371478318.0, "reward": 0.9993168830871582, "reward_std": 0.1252796620130539, "rewards/accuracy_reward": 0.6919270873069763, "rewards/brier_reward": 0.7822542548179626, "rewards/confidence_uniqueness_reward": 0.940218985080719, "rewards/format_reward": 0.9877604126930237, "rewards/frontier_aurc_reward": -0.001636920589953661, "rewards/frontier_coverage_0": -0.009225619398057461, "rewards/frontier_coverage_1": -0.009225619398057461, "rewards/frontier_coverage_10": -0.00855890940874815, "rewards/frontier_coverage_15": 0.014033466950058937, "rewards/frontier_coverage_20": 0.05618218407034874, "rewards/frontier_coverage_25": 0.12737512439489365, "rewards/frontier_coverage_5": -0.009225619398057461, "rewards/frontier_ece_reward": -0.0031481004785746335, "rewards/frontier_entropy_batch_reward": -0.14455921649932862, "signal/accuracy_reward/centered_abs_mean": 0.15904405415058137, "signal/accuracy_reward/group_bin_occupancy": 0.19548611111111108, "signal/accuracy_reward/group_std_mean": 0.20459264814853667, "signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07952202707529069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07952202707529069, "signal/advantage_abs_mean": 0.0955008551478386, "signal/advantage_pre_scale_abs_mean": 0.0955008551478386, "signal/advantage_pre_scale_std": 0.1540976881980896, "signal/advantage_std": 0.1540976881980896, "signal/brier_reward/centered_abs_mean": 0.14529342353343963, "signal/brier_reward/group_bin_occupancy": 0.8604166666666666, "signal/brier_reward/group_std_mean": 0.18353629410266875, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014529342763125896, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014529342763125896, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03009340800344944, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8524305555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.046408722549676894, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030093408189713956, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030093408189713956, "signal/format_reward/centered_abs_mean": 0.01939561627805233, "signal/format_reward/group_bin_occupancy": 0.14131944444444441, "signal/format_reward/group_std_mean": 0.03335440866649151, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009697808139026164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009697808139026164, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020065686898306013, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6559027777777777, "signal/frontier_aurc_reward/group_std_mean": 0.0037283867597579954, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.508210891392082e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.508210891392082e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1982041120529175, "signal/frontier_coverage_0/group_bin_occupancy": 0.8604166666666666, "signal/frontier_coverage_0/group_std_mean": 0.2535953104496002, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024775514844805, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024775514844805, "signal/frontier_coverage_1/centered_abs_mean": 0.1982041120529175, "signal/frontier_coverage_1/group_bin_occupancy": 0.8604166666666666, "signal/frontier_coverage_1/group_std_mean": 0.2535953104496002, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024775514844805, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024775514844805, "signal/frontier_coverage_10/centered_abs_mean": 0.195833483338356, "signal/frontier_coverage_10/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_10/group_std_mean": 0.2506587952375412, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024479186162352563, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024479186162352563, "signal/frontier_coverage_15/centered_abs_mean": 0.10796615332365037, "signal/frontier_coverage_15/group_bin_occupancy": 0.8600694444444444, "signal/frontier_coverage_15/group_std_mean": 0.14121497869491578, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001349576935172081, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001349576935172081, "signal/frontier_coverage_20/centered_abs_mean": 0.06482557505369187, "signal/frontier_coverage_20/group_bin_occupancy": 0.91875, "signal/frontier_coverage_20/group_std_mean": 0.0837198704481125, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008103197091259062, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008103197091259062, "signal/frontier_coverage_25/centered_abs_mean": 0.10678046792745591, "signal/frontier_coverage_25/group_bin_occupancy": 0.8875, "signal/frontier_coverage_25/group_std_mean": 0.1394643157720566, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001334755914285779, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001334755914285779, "signal/frontier_coverage_5/centered_abs_mean": 0.1982041120529175, "signal/frontier_coverage_5/group_bin_occupancy": 0.8604166666666666, "signal/frontier_coverage_5/group_std_mean": 0.2535953104496002, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024775514844805, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024775514844805, "signal/frontier_ece_reward/centered_abs_mean": 0.02167145274579525, "signal/frontier_ece_reward/group_bin_occupancy": 0.7135416666666667, "signal/frontier_ece_reward/group_std_mean": 0.026740428060293198, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021671453956514596, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021671453956514596, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2169477492570877, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7670138888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.28880282044410704, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02169477492570877, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02169477492570877, "step": 185 }, { "calibration/aurc": 0.16203958912445476, "calibration/batch_distribution_entropy": 0.9564638809055342, "calibration/batch_entropy_100bins": 0.9504243671299063, "calibration/batch_entropy_10bins": 0.9564638809055342, "calibration/batch_entropy_50bins": 0.9588082852441188, "calibration/batch_uniqueness": 0.9485751236738553, "calibration/buffer_distribution_entropy": 0.9855318683814028, "calibration/buffer_entropy_100bins": 0.9920046840716885, "calibration/buffer_entropy_10bins": 0.9855318683814028, "calibration/buffer_entropy_50bins": 0.9911690583473515, "calibration/confidence_entropy": 0.5012853403916044, "calibration/coverage@0%": 0.05780618281758908, "calibration/coverage@1%": 0.05780618281758908, "calibration/coverage@10%": 0.31838293083869723, "calibration/coverage@15%": 0.4790459460921778, "calibration/coverage@20%": 0.5617222445744519, "calibration/coverage@25%": 0.9087071261402514, "calibration/coverage@30%": 0.9716345096745822, "calibration/coverage@5%": 0.1443786218306891, "calibration/ece": 0.17111073876394964, "calibration/mean_confidence": 0.6004809067734467, "calibration/prompt_uniqueness": 0.8665386519176552, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009461805555555536, "completions/max_length": 3465.6, "completions/max_terminated_length": 3465.6, "completions/mean_length": 627.6765625, "completions/mean_terminated_length": 633.6748168945312, "completions/min_length": 0.0, "completions/min_terminated_length": 181.6, "epoch": 0.45599430007124914, "grad_norm": 0.0004201448755338788, "learning_rate": 5.421686746987952e-07, "loss": -0.0078, "num_tokens": 381792096.0, "reward": 1.016054892539978, "reward_std": 0.12401713877916336, "rewards/accuracy_reward": 0.7311631917953492, "rewards/brier_reward": 0.800844419002533, "rewards/confidence_uniqueness_reward": 0.9397600173950196, "rewards/format_reward": 0.9903645753860474, "rewards/frontier_aurc_reward": -0.0016162074403837322, "rewards/frontier_coverage_0": -0.015276820957660675, "rewards/frontier_coverage_1": -0.015276820957660675, "rewards/frontier_coverage_10": -0.014098763652145862, "rewards/frontier_coverage_15": 0.01533528920263052, "rewards/frontier_coverage_20": 0.07684787213802338, "rewards/frontier_coverage_25": 0.1640935003757477, "rewards/frontier_coverage_5": -0.015273858606815339, "rewards/frontier_ece_reward": -0.0036209038575179876, "rewards/frontier_entropy_batch_reward": -0.20841516852378844, "signal/accuracy_reward/centered_abs_mean": 0.15256619155406953, "signal/accuracy_reward/group_bin_occupancy": 0.19826388888888888, "signal/accuracy_reward/group_std_mean": 0.20278047025203705, "signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07628309577703477, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07628309577703477, "signal/advantage_abs_mean": 0.09019981622695923, "signal/advantage_pre_scale_abs_mean": 0.09019981622695923, "signal/advantage_pre_scale_std": 0.15100550949573516, "signal/advantage_std": 0.15100550949573516, "signal/brier_reward/centered_abs_mean": 0.13891661763191224, "signal/brier_reward/group_bin_occupancy": 0.8350694444444444, "signal/brier_reward/group_std_mean": 0.17901506423950195, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01389166172593832, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01389166172593832, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028540104255080224, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8395833333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.04933372884988785, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028540104161947966, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028540104161947966, "signal/format_reward/centered_abs_mean": 0.01725802943110466, "signal/format_reward/group_bin_occupancy": 0.14479166666666668, "signal/format_reward/group_std_mean": 0.03579398356378079, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00862901471555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00862901471555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020792306633666156, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6552083333333333, "signal/frontier_aurc_reward/group_std_mean": 0.0039239289239048954, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5990382710006088e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5990382710006088e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18381263613700866, "signal/frontier_coverage_0/group_bin_occupancy": 0.8267361111111111, "signal/frontier_coverage_0/group_std_mean": 0.23998367488384248, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022976579144597053, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022976579144597053, "signal/frontier_coverage_1/centered_abs_mean": 0.18381263613700866, "signal/frontier_coverage_1/group_bin_occupancy": 0.8267361111111111, "signal/frontier_coverage_1/group_std_mean": 0.23998367488384248, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022976579144597053, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022976579144597053, "signal/frontier_coverage_10/centered_abs_mean": 0.1811767816543579, "signal/frontier_coverage_10/group_bin_occupancy": 0.8246527777777779, "signal/frontier_coverage_10/group_std_mean": 0.2366650640964508, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022647099569439886, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022647099569439886, "signal/frontier_coverage_15/centered_abs_mean": 0.09005323797464371, "signal/frontier_coverage_15/group_bin_occupancy": 0.8552083333333333, "signal/frontier_coverage_15/group_std_mean": 0.11996055394411087, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011256654281169177, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011256654281169177, "signal/frontier_coverage_20/centered_abs_mean": 0.07242253422737122, "signal/frontier_coverage_20/group_bin_occupancy": 0.923611111111111, "signal/frontier_coverage_20/group_std_mean": 0.09198382496833801, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009052816778421402, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009052816778421402, "signal/frontier_coverage_25/centered_abs_mean": 0.11942882239818572, "signal/frontier_coverage_25/group_bin_occupancy": 0.8909722222222222, "signal/frontier_coverage_25/group_std_mean": 0.15319141745567322, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014928602380678059, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014928602380678059, "signal/frontier_coverage_5/centered_abs_mean": 0.18380914330482484, "signal/frontier_coverage_5/group_bin_occupancy": 0.8267361111111111, "signal/frontier_coverage_5/group_std_mean": 0.23997901380062103, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002297614235430956, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002297614235430956, "signal/frontier_ece_reward/centered_abs_mean": 0.02063850834965706, "signal/frontier_ece_reward/group_bin_occupancy": 0.6902777777777778, "signal/frontier_ece_reward/group_std_mean": 0.02570592537522316, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002063850755803287, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002063850755803287, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2538691431283951, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7777777777777779, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3231283605098724, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025386914610862732, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025386914610862732, "step": 190 }, { "calibration/aurc": 0.1961372764660094, "calibration/batch_distribution_entropy": 0.9787626043754896, "calibration/batch_entropy_100bins": 0.9610797936801457, "calibration/batch_entropy_10bins": 0.9787626043754896, "calibration/batch_entropy_50bins": 0.9740262499384797, "calibration/batch_uniqueness": 0.952937991093231, "calibration/buffer_distribution_entropy": 0.9854658504578813, "calibration/buffer_entropy_100bins": 0.9919799868819045, "calibration/buffer_entropy_10bins": 0.9854658504578813, "calibration/buffer_entropy_50bins": 0.9911334418833307, "calibration/confidence_entropy": 0.4938269497363332, "calibration/coverage@0%": 0.017848624480017818, "calibration/coverage@1%": 0.017848624480017818, "calibration/coverage@10%": 0.3216058794764497, "calibration/coverage@15%": 0.44077104008081436, "calibration/coverage@20%": 0.5248942584977563, "calibration/coverage@25%": 0.7557016752611363, "calibration/coverage@30%": 0.8214413911949145, "calibration/coverage@5%": 0.14017045767584957, "calibration/ece": 0.17108793119108695, "calibration/mean_confidence": 0.5567763002783004, "calibration/prompt_uniqueness": 0.8632525025551473, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012152777777777768, "completions/max_length": 3528.2, "completions/max_terminated_length": 3528.2, "completions/mean_length": 654.6660522460937, "completions/mean_terminated_length": 662.805517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 188.2, "epoch": 0.46799415007312406, "grad_norm": 0.0004228286852594465, "learning_rate": 3.91566265060241e-07, "loss": -0.0096, "num_tokens": 392414713.0, "reward": 0.9901637196540832, "reward_std": 0.12346882373094559, "rewards/accuracy_reward": 0.6758680582046509, "rewards/brier_reward": 0.7898530125617981, "rewards/confidence_uniqueness_reward": 0.9386414170265198, "rewards/format_reward": 0.9878472208976745, "rewards/frontier_aurc_reward": -0.0017654816154390573, "rewards/frontier_coverage_0": 0.008542282739654183, "rewards/frontier_coverage_1": 0.008542282739654183, "rewards/frontier_coverage_10": 0.00909471595659852, "rewards/frontier_coverage_15": 0.0248194869607687, "rewards/frontier_coverage_20": 0.07406894192099571, "rewards/frontier_coverage_25": 0.14769483357667923, "rewards/frontier_coverage_5": 0.008551673218607902, "rewards/frontier_ece_reward": -0.0010886201984249056, "rewards/frontier_entropy_batch_reward": -0.1792888253927231, "signal/accuracy_reward/centered_abs_mean": 0.14333767145872117, "signal/accuracy_reward/group_bin_occupancy": 0.196875, "signal/accuracy_reward/group_std_mean": 0.1954125940799713, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07166883572936059, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07166883572936059, "signal/advantage_abs_mean": 0.0889064148068428, "signal/advantage_pre_scale_abs_mean": 0.0889064148068428, "signal/advantage_pre_scale_std": 0.14890001118183135, "signal/advantage_std": 0.14890001118183135, "signal/brier_reward/centered_abs_mean": 0.14416175484657287, "signal/brier_reward/group_bin_occupancy": 0.8413194444444445, "signal/brier_reward/group_std_mean": 0.18464682400226592, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01441617514938116, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01441617514938116, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03026603311300278, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8399305555555555, "signal/confidence_uniqueness_reward/group_std_mean": 0.050313469022512436, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003026603301987052, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003026603301987052, "signal/format_reward/centered_abs_mean": 0.019162326864898205, "signal/format_reward/group_bin_occupancy": 0.14479166666666665, "signal/format_reward/group_std_mean": 0.036996308341622354, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009581163432449103, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009581163432449103, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021472658263519406, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6767361111111111, "signal/frontier_aurc_reward/group_std_mean": 0.0038169473875313996, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.684082428459078e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.684082428459078e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1832861989736557, "signal/frontier_coverage_0/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_0/group_std_mean": 0.24093341827392578, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022910774918273092, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022910774918273092, "signal/frontier_coverage_1/centered_abs_mean": 0.1832861989736557, "signal/frontier_coverage_1/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_1/group_std_mean": 0.24093341827392578, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022910774918273092, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022910774918273092, "signal/frontier_coverage_10/centered_abs_mean": 0.18060127198696135, "signal/frontier_coverage_10/group_bin_occupancy": 0.84375, "signal/frontier_coverage_10/group_std_mean": 0.2375454157590866, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022575160022825004, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022575160022825004, "signal/frontier_coverage_15/centered_abs_mean": 0.08088361173868179, "signal/frontier_coverage_15/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_15/group_std_mean": 0.10819252133369446, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010110451141372323, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010110451141372323, "signal/frontier_coverage_20/centered_abs_mean": 0.07415172904729843, "signal/frontier_coverage_20/group_bin_occupancy": 0.9243055555555555, "signal/frontier_coverage_20/group_std_mean": 0.09439714550971985, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009268965688534081, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009268965688534081, "signal/frontier_coverage_25/centered_abs_mean": 0.11941829919815064, "signal/frontier_coverage_25/group_bin_occupancy": 0.8850694444444445, "signal/frontier_coverage_25/group_std_mean": 0.15383650064468385, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014927288517355918, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014927288517355918, "signal/frontier_coverage_5/centered_abs_mean": 0.18320149779319764, "signal/frontier_coverage_5/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_5/group_std_mean": 0.24082353413105012, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002290018741041422, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002290018741041422, "signal/frontier_ece_reward/centered_abs_mean": 0.01998976320028305, "signal/frontier_ece_reward/group_bin_occupancy": 0.6802083333333333, "signal/frontier_ece_reward/group_std_mean": 0.02510824017226696, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001998976385220885, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001998976385220885, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23103305995464324, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7687499999999999, "signal/frontier_entropy_batch_reward/group_std_mean": 0.30060619711875913, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023103305697441102, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023103305697441102, "step": 195 }, { "calibration/aurc": 0.14460988695922733, "calibration/batch_distribution_entropy": 0.9495513680360439, "calibration/batch_entropy_100bins": 0.948140998369011, "calibration/batch_entropy_10bins": 0.9495513680360439, "calibration/batch_entropy_50bins": 0.9562598495698762, "calibration/batch_uniqueness": 0.9474501559632424, "calibration/buffer_distribution_entropy": 0.9853301528282403, "calibration/buffer_entropy_100bins": 0.9919136390750427, "calibration/buffer_entropy_10bins": 0.9853301528282403, "calibration/buffer_entropy_50bins": 0.9910459470294087, "calibration/confidence_entropy": 0.5112296963776538, "calibration/coverage@0%": 0.032981623612241454, "calibration/coverage@1%": 0.032981623612241454, "calibration/coverage@10%": 0.46781779581637994, "calibration/coverage@15%": 0.5944394938795089, "calibration/coverage@20%": 0.7251206752410088, "calibration/coverage@25%": 0.9559466293867314, "calibration/coverage@30%": 0.9758530183727034, "calibration/coverage@5%": 0.08477990939682992, "calibration/ece": 0.18029392485104195, "calibration/mean_confidence": 0.6085970431868855, "calibration/prompt_uniqueness": 0.867308580953482, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009548611111111138, "completions/max_length": 3404.4, "completions/max_terminated_length": 3404.4, "completions/mean_length": 633.7380249023438, "completions/mean_terminated_length": 639.8541870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 195.2, "epoch": 0.47999400007499904, "grad_norm": 0.00048563332529738545, "learning_rate": 2.409638554216868e-07, "loss": -0.0069, "num_tokens": 402783183.0, "reward": 1.0019099831581115, "reward_std": 0.12241675555706025, "rewards/accuracy_reward": 0.7002604126930236, "rewards/brier_reward": 0.7927653312683105, "rewards/confidence_uniqueness_reward": 0.9402285814285278, "rewards/format_reward": 0.9902777671813965, "rewards/frontier_aurc_reward": -0.0022210155380889773, "rewards/frontier_coverage_0": -0.0035953870275989173, "rewards/frontier_coverage_1": -0.0035953870275989173, "rewards/frontier_coverage_10": -0.0024497059057466686, "rewards/frontier_coverage_15": 0.02303452733904123, "rewards/frontier_coverage_20": 0.07562950998544693, "rewards/frontier_coverage_25": 0.15210793316364288, "rewards/frontier_coverage_5": -0.003559676537406631, "rewards/frontier_ece_reward": -0.003188342018984258, "rewards/frontier_entropy_batch_reward": -0.19281545877456666, "signal/accuracy_reward/centered_abs_mean": 0.1425075948238373, "signal/accuracy_reward/group_bin_occupancy": 0.190625, "signal/accuracy_reward/group_std_mean": 0.18748272955417633, "signal/accuracy_reward/group_zero_std_frac": 0.47500001192092894, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07125379741191865, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07125379741191865, "signal/advantage_abs_mean": 0.09010151475667953, "signal/advantage_pre_scale_abs_mean": 0.09010151475667953, "signal/advantage_pre_scale_std": 0.14910052120685577, "signal/advantage_std": 0.14910052120685577, "signal/brier_reward/centered_abs_mean": 0.14125451743602752, "signal/brier_reward/group_bin_occupancy": 0.8416666666666668, "signal/brier_reward/group_std_mean": 0.1821454256772995, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014125452749431134, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014125452749431134, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028338390961289407, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.84375, "signal/confidence_uniqueness_reward/group_std_mean": 0.04806696176528931, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002833839226514101, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002833839226514101, "signal/format_reward/centered_abs_mean": 0.01692708358168602, "signal/format_reward/group_bin_occupancy": 0.14375, "signal/format_reward/group_std_mean": 0.03435967043042183, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00846354179084301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00846354179084301, "signal/frontier_aurc_reward/centered_abs_mean": 0.002712964592501521, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6565972222222222, "signal/frontier_aurc_reward/group_std_mean": 0.005087446887046099, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.391205755178817e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.391205755178817e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17483938336372376, "signal/frontier_coverage_0/group_bin_occupancy": 0.8496527777777777, "signal/frontier_coverage_0/group_std_mean": 0.2283846229314804, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00218549226410687, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00218549226410687, "signal/frontier_coverage_1/centered_abs_mean": 0.17483938336372376, "signal/frontier_coverage_1/group_bin_occupancy": 0.8496527777777777, "signal/frontier_coverage_1/group_std_mean": 0.2283846229314804, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00218549226410687, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00218549226410687, "signal/frontier_coverage_10/centered_abs_mean": 0.17156257033348082, "signal/frontier_coverage_10/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_10/group_std_mean": 0.22431055903434755, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002144532185047865, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002144532185047865, "signal/frontier_coverage_15/centered_abs_mean": 0.07161953896284104, "signal/frontier_coverage_15/group_bin_occupancy": 0.88125, "signal/frontier_coverage_15/group_std_mean": 0.09553508013486862, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008952441858127713, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008952441858127713, "signal/frontier_coverage_20/centered_abs_mean": 0.07612911015748977, "signal/frontier_coverage_20/group_bin_occupancy": 0.9177083333333332, "signal/frontier_coverage_20/group_std_mean": 0.0976193055510521, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000951613939832896, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000951613939832896, "signal/frontier_coverage_25/centered_abs_mean": 0.12673709094524382, "signal/frontier_coverage_25/group_bin_occupancy": 0.9010416666666666, "signal/frontier_coverage_25/group_std_mean": 0.16289995312690736, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015842135995626449, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015842135995626449, "signal/frontier_coverage_5/centered_abs_mean": 0.1747281402349472, "signal/frontier_coverage_5/group_bin_occupancy": 0.85, "signal/frontier_coverage_5/group_std_mean": 0.22824438512325287, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021841016598045824, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021841016598045824, "signal/frontier_ece_reward/centered_abs_mean": 0.01971760131418705, "signal/frontier_ece_reward/group_bin_occupancy": 0.6673611111111111, "signal/frontier_ece_reward/group_std_mean": 0.024473632127046584, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001971760136075318, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001971760136075318, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2508280843496323, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7565972222222223, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3208978533744812, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02508280873298645, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02508280873298645, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.1313149807340917, "eval_calibration/batch_distribution_entropy": 0.9010256730205485, "eval_calibration/batch_entropy_100bins": 0.705057482475523, "eval_calibration/batch_entropy_10bins": 0.9010256730205485, "eval_calibration/batch_entropy_50bins": 0.7709351303877786, "eval_calibration/batch_uniqueness": 0.8971089956208811, "eval_calibration/buffer_distribution_entropy": 0.9850070003261059, "eval_calibration/buffer_entropy_100bins": 0.991778938093653, "eval_calibration/buffer_entropy_10bins": 0.9850070003261059, "eval_calibration/buffer_entropy_50bins": 0.9908763990301034, "eval_calibration/confidence_entropy": 0.4905625738638028, "eval_calibration/coverage@0%": 0.245127688172043, "eval_calibration/coverage@1%": 0.245127688172043, "eval_calibration/coverage@10%": 0.526377688172043, "eval_calibration/coverage@15%": 0.651377688172043, "eval_calibration/coverage@20%": 0.814516129032258, "eval_calibration/coverage@25%": 0.9578293010752689, "eval_calibration/coverage@30%": 0.9947916666666666, "eval_calibration/coverage@5%": 0.245127688172043, "eval_calibration/ece": 0.22274469707586766, "eval_calibration/mean_confidence": 0.5956210189997321, "eval_calibration/prompt_uniqueness": 0.8971089956208811, "eval_completions/clipped_ratio": 0.011284722222222229, "eval_completions/max_length": 2300.8333333333335, "eval_completions/max_terminated_length": 2300.8333333333335, "eval_completions/mean_length": 645.4890747070312, "eval_completions/mean_terminated_length": 652.927968343099, "eval_completions/min_length": 50.0, "eval_completions/min_terminated_length": 223.0, "eval_loss": 0.0, "eval_num_tokens": 402783183.0, "eval_reward": 0.9437916080156962, "eval_reward_std": 0.243720144033432, "eval_rewards/accuracy_reward": 0.6909722288449606, "eval_rewards/brier_reward": 0.7851507067680359, "eval_rewards/confidence_uniqueness_reward": 0.8838565051555634, "eval_rewards/format_reward": 0.9869791666666666, "eval_rewards/frontier_aurc_reward": -0.0024154275791564337, "eval_rewards/frontier_coverage_0": -0.0033070078740517297, "eval_rewards/frontier_coverage_1": -0.0033070078740517297, "eval_rewards/frontier_coverage_10": -0.0025324359691391387, "eval_rewards/frontier_coverage_15": 0.024358487998445828, "eval_rewards/frontier_coverage_20": 0.07857182746132214, "eval_rewards/frontier_coverage_25": 0.1538891519109408, "eval_rewards/frontier_coverage_5": -0.003267340362071991, "eval_rewards/frontier_ece_reward": -0.002330610683808724, "eval_rewards/frontier_entropy_batch_reward": -0.6487665772438049, "eval_runtime": 207.9871, "eval_samples_per_second": 4.808, "eval_signal/accuracy_reward/centered_abs_mean": 0.4146050314108531, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.46133896211783093, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20730251570542654, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20730251570542654, "eval_signal/advantage_abs_mean": 0.20695754885673523, "eval_signal/advantage_pre_scale_abs_mean": 0.20695754885673523, "eval_signal/advantage_pre_scale_std": 0.24312934776147208, "eval_signal/advantage_std": 0.24312934776147208, "eval_signal/brier_reward/centered_abs_mean": 0.19702969988187155, "eval_signal/brier_reward/group_bin_occupancy": 0.8784722222222223, "eval_signal/brier_reward/group_std_mean": 0.2518209119637807, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019702970360716183, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019702970360716183, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05498677988847097, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.38888888888888884, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08798779795567195, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005498677957803011, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005498677957803011, "eval_signal/format_reward/centered_abs_mean": 0.024576822761446238, "eval_signal/format_reward/group_bin_occupancy": 0.1597222222222222, "eval_signal/format_reward/group_std_mean": 0.05818357535948356, "eval_signal/format_reward/group_zero_std_frac": 0.7222222487131754, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012288411380723119, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012288411380723119, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004131359804887325, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5694444444444444, "eval_signal/frontier_aurc_reward/group_std_mean": 0.009391291804301241, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.16420004714746e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.16420004714746e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.25250349193811417, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9027777777777778, "eval_signal/frontier_coverage_0/group_std_mean": 0.3640611221392949, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003156293804446856, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003156293804446856, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.25250349193811417, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9027777777777778, "eval_signal/frontier_coverage_1/group_std_mean": 0.3640611221392949, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003156293804446856, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003156293804446856, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.24736239512761435, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9027777777777778, "eval_signal/frontier_coverage_10/group_std_mean": 0.3576079159975052, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030920300244664154, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030920300244664154, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.08761641258994739, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8854166666666666, "eval_signal/frontier_coverage_15/group_std_mean": 0.13278244932492575, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010952051864781727, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010952051864781727, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1130032017827034, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9305555555555555, "eval_signal/frontier_coverage_20/group_std_mean": 0.14612068235874176, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014125400533278782, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014125400533278782, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2178284153342247, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9270833333333334, "eval_signal/frontier_coverage_25/group_std_mean": 0.2713290477792422, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027228551916778088, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027228551916778088, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2523079713185628, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9027777777777778, "eval_signal/frontier_coverage_5/group_std_mean": 0.36381277441978455, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031538497811804214, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031538497811804214, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.02772780228406191, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8506944444444445, "eval_signal/frontier_ece_reward/group_std_mean": 0.03577593838175138, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00277278032929947, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00277278032929947, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.322118878364563, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.28472222222222227, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3369586815436681, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03221188889195522, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03221188889195522, "eval_steps_per_second": 0.029, "step": 200 }, { "calibration/aurc": 0.19286525379114844, "calibration/batch_distribution_entropy": 0.9508638400230053, "calibration/batch_entropy_100bins": 0.9472046153786987, "calibration/batch_entropy_10bins": 0.9508638400230053, "calibration/batch_entropy_50bins": 0.9586811454627515, "calibration/batch_uniqueness": 0.9478697952898288, "calibration/buffer_distribution_entropy": 0.985099606750224, "calibration/buffer_entropy_100bins": 0.9918358877130297, "calibration/buffer_entropy_10bins": 0.985099606750224, "calibration/buffer_entropy_50bins": 0.9909422179315424, "calibration/confidence_entropy": 0.4997781015447198, "calibration/coverage@0%": 0.01370268496669971, "calibration/coverage@1%": 0.01370268496669971, "calibration/coverage@10%": 0.08365004677399088, "calibration/coverage@15%": 0.39644238421691963, "calibration/coverage@20%": 0.611257871294732, "calibration/coverage@25%": 0.8797210315410284, "calibration/coverage@30%": 0.939168679577219, "calibration/coverage@5%": 0.01370268496669971, "calibration/ece": 0.14749019364217641, "calibration/mean_confidence": 0.606638199869165, "calibration/prompt_uniqueness": 0.8656049290865866, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111116, "completions/max_length": 3214.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 652.1061767578125, "completions/mean_terminated_length": 657.898583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 175.6, "epoch": 0.491993850076874, "grad_norm": 0.0003685148840304464, "learning_rate": 9.036144578313253e-08, "loss": -0.0055, "num_tokens": 413361398.0, "reward": 1.0259872198104858, "reward_std": 0.11690017282962799, "rewards/accuracy_reward": 0.7453993082046508, "rewards/brier_reward": 0.7930923223495483, "rewards/confidence_uniqueness_reward": 0.9420808672904968, "rewards/format_reward": 0.9911458373069764, "rewards/frontier_aurc_reward": -0.0016583121148869395, "rewards/frontier_coverage_0": -0.030156330950558186, "rewards/frontier_coverage_1": -0.030156330950558186, "rewards/frontier_coverage_10": -0.028441790863871573, "rewards/frontier_coverage_15": 0.021205396763980387, "rewards/frontier_coverage_20": 0.08911058455705642, "rewards/frontier_coverage_25": 0.17736924588680267, "rewards/frontier_coverage_5": -0.0300977423787117, "rewards/frontier_ece_reward": -0.006220728810876608, "rewards/frontier_entropy_batch_reward": -0.17270275056362153, "signal/accuracy_reward/centered_abs_mean": 0.14183485507965088, "signal/accuracy_reward/group_bin_occupancy": 0.1951388888888889, "signal/accuracy_reward/group_std_mean": 0.19232783019542693, "signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07091742753982544, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07091742753982544, "signal/advantage_abs_mean": 0.0845324456691742, "signal/advantage_pre_scale_abs_mean": 0.0845324456691742, "signal/advantage_pre_scale_std": 0.1428141325712204, "signal/advantage_std": 0.1428141325712204, "signal/brier_reward/centered_abs_mean": 0.1418829470872879, "signal/brier_reward/group_bin_occupancy": 0.84375, "signal/brier_reward/group_std_mean": 0.18193072974681854, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014188294671475888, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014188294671475888, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026093775033950807, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.865625, "signal/confidence_uniqueness_reward/group_std_mean": 0.04311029836535454, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026093775872141124, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026093775872141124, "signal/format_reward/centered_abs_mean": 0.014822048880159856, "signal/format_reward/group_bin_occupancy": 0.140625, "signal/format_reward/group_std_mean": 0.029389195144176483, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007411024440079928, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007411024440079928, "signal/frontier_aurc_reward/centered_abs_mean": 0.002050434215925634, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6666666666666667, "signal/frontier_aurc_reward/group_std_mean": 0.003803052147850394, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.563042944530025e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.563042944530025e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18497320711612703, "signal/frontier_coverage_0/group_bin_occupancy": 0.8427083333333334, "signal/frontier_coverage_0/group_std_mean": 0.24077284038066865, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002312165219336748, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002312165219336748, "signal/frontier_coverage_1/centered_abs_mean": 0.18497320711612703, "signal/frontier_coverage_1/group_bin_occupancy": 0.8427083333333334, "signal/frontier_coverage_1/group_std_mean": 0.24077284038066865, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002312165219336748, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002312165219336748, "signal/frontier_coverage_10/centered_abs_mean": 0.18142623901367189, "signal/frontier_coverage_10/group_bin_occupancy": 0.8402777777777777, "signal/frontier_coverage_10/group_std_mean": 0.23639352917671203, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022678279783576727, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022678279783576727, "signal/frontier_coverage_15/centered_abs_mean": 0.07308610081672669, "signal/frontier_coverage_15/group_bin_occupancy": 0.8972222222222221, "signal/frontier_coverage_15/group_std_mean": 0.09623065441846848, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009135762811638415, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009135762811638415, "signal/frontier_coverage_20/centered_abs_mean": 0.07907227426767349, "signal/frontier_coverage_20/group_bin_occupancy": 0.9204861111111112, "signal/frontier_coverage_20/group_std_mean": 0.10045547783374786, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009884034050628542, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009884034050628542, "signal/frontier_coverage_25/centered_abs_mean": 0.12448778450489044, "signal/frontier_coverage_25/group_bin_occupancy": 0.8934027777777779, "signal/frontier_coverage_25/group_std_mean": 0.1596580684185028, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015560972038656472, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015560972038656472, "signal/frontier_coverage_5/centered_abs_mean": 0.18486446142196655, "signal/frontier_coverage_5/group_bin_occupancy": 0.8427083333333334, "signal/frontier_coverage_5/group_std_mean": 0.24063428342342377, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002310805721208453, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002310805721208453, "signal/frontier_ece_reward/centered_abs_mean": 0.020203196629881858, "signal/frontier_ece_reward/group_bin_occupancy": 0.6336805555555556, "signal/frontier_ece_reward/group_std_mean": 0.02513127215206623, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020203196443617346, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020203196443617346, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22744437754154206, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7774305555555555, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2937332093715668, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022744438052177428, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022744438052177428, "step": 205 }, { "calibration/aurc": 0.10604995661886611, "calibration/batch_distribution_entropy": 0.9459823056432164, "calibration/batch_entropy_100bins": 0.9445751943961431, "calibration/batch_entropy_10bins": 0.9459823056432164, "calibration/batch_entropy_50bins": 0.9524671961811779, "calibration/batch_uniqueness": 0.945795930081056, "calibration/buffer_distribution_entropy": 0.9843894309209239, "calibration/buffer_entropy_100bins": 0.991489315771584, "calibration/buffer_entropy_10bins": 0.9843894309209239, "calibration/buffer_entropy_50bins": 0.9905221962633863, "calibration/confidence_entropy": 0.49619946668569376, "calibration/coverage@0%": 0.0810229902207145, "calibration/coverage@1%": 0.0810229902207145, "calibration/coverage@10%": 0.5336122948397576, "calibration/coverage@15%": 0.7811456966373963, "calibration/coverage@20%": 0.8943865740740741, "calibration/coverage@25%": 0.9825562169312169, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.28413915588222977, "calibration/ece": 0.13840996405365147, "calibration/mean_confidence": 0.6308651120728337, "calibration/prompt_uniqueness": 0.8631049911748535, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00868055555555558, "completions/max_length": 3621.0, "completions/max_terminated_length": 3621.0, "completions/mean_length": 655.096923828125, "completions/mean_terminated_length": 660.8446044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 188.33333333333334, "epoch": 0.49919376007799904, "num_tokens": 419755476.0, "reward": 1.001288930575053, "reward_std": 0.11777538061141968, "rewards/accuracy_reward": 0.6956018408139547, "rewards/brier_reward": 0.7960908611615499, "rewards/confidence_uniqueness_reward": 0.94153360525767, "rewards/format_reward": 0.9911747574806213, "rewards/frontier_aurc_reward": -0.0013014324552689989, "rewards/frontier_coverage_0": 0.00015948344177256027, "rewards/frontier_coverage_1": 0.00015948344177256027, "rewards/frontier_coverage_10": 0.0008107475781192383, "rewards/frontier_coverage_15": 0.027395144725839298, "rewards/frontier_coverage_20": 0.08610829710960388, "rewards/frontier_coverage_25": 0.1655142605304718, "rewards/frontier_coverage_5": 0.0002017094132800897, "rewards/frontier_ece_reward": -0.0024814563415323696, "rewards/frontier_entropy_batch_reward": -0.19101824859778085, "signal/accuracy_reward/centered_abs_mean": 0.14246961971124014, "signal/accuracy_reward/group_bin_occupancy": 0.1909722222222222, "signal/accuracy_reward/group_std_mean": 0.1864061305920283, "signal/accuracy_reward/group_zero_std_frac": 0.4722222089767456, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07123480985562007, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07123480985562007, "signal/advantage_abs_mean": 0.08667557189861934, "signal/advantage_pre_scale_abs_mean": 0.08667557189861934, "signal/advantage_pre_scale_std": 0.1438957303762436, "signal/advantage_std": 0.1438957303762436, "signal/brier_reward/centered_abs_mean": 0.14015839993953705, "signal/brier_reward/group_bin_occupancy": 0.8518518518518517, "signal/brier_reward/group_std_mean": 0.17842622101306915, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014015840366482735, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014015840366482735, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027561215683817863, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8454861111111112, "signal/confidence_uniqueness_reward/group_std_mean": 0.04839188729723295, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002756121257940928, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002756121257940928, "signal/format_reward/centered_abs_mean": 0.01621274556964636, "signal/format_reward/group_bin_occupancy": 0.14467592592592593, "signal/format_reward/group_std_mean": 0.034930519138773285, "signal/format_reward/group_zero_std_frac": 0.8425925970077515, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00810637278482318, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00810637278482318, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016874617819363873, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6921296296296297, "signal/frontier_aurc_reward/group_std_mean": 0.0032867664316048226, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1093272759268682e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1093272759268682e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19459756712118784, "signal/frontier_coverage_0/group_bin_occupancy": 0.8425925925925926, "signal/frontier_coverage_0/group_std_mean": 0.24988562365372977, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024324696666250625, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024324696666250625, "signal/frontier_coverage_1/centered_abs_mean": 0.19459756712118784, "signal/frontier_coverage_1/group_bin_occupancy": 0.8425925925925926, "signal/frontier_coverage_1/group_std_mean": 0.24988562365372977, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024324696666250625, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024324696666250625, "signal/frontier_coverage_10/centered_abs_mean": 0.19071337580680847, "signal/frontier_coverage_10/group_bin_occupancy": 0.8431712962962963, "signal/frontier_coverage_10/group_std_mean": 0.2450977216164271, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002383917337283492, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002383917337283492, "signal/frontier_coverage_15/centered_abs_mean": 0.07185898224512736, "signal/frontier_coverage_15/group_bin_occupancy": 0.9074074074074074, "signal/frontier_coverage_15/group_std_mean": 0.09349055091540019, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008982373401522636, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008982373401522636, "signal/frontier_coverage_20/centered_abs_mean": 0.07390168060859044, "signal/frontier_coverage_20/group_bin_occupancy": 0.9201388888888888, "signal/frontier_coverage_20/group_std_mean": 0.09523183355728786, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009237710037268698, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009237710037268698, "signal/frontier_coverage_25/centered_abs_mean": 0.11694104224443436, "signal/frontier_coverage_25/group_bin_occupancy": 0.8831018518518517, "signal/frontier_coverage_25/group_std_mean": 0.15213672816753387, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014617630513384938, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014617630513384938, "signal/frontier_coverage_5/centered_abs_mean": 0.19448575377464294, "signal/frontier_coverage_5/group_bin_occupancy": 0.8425925925925926, "signal/frontier_coverage_5/group_std_mean": 0.24974611898263296, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024310719842712083, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024310719842712083, "signal/frontier_ece_reward/centered_abs_mean": 0.02061399631202221, "signal/frontier_ece_reward/group_bin_occupancy": 0.6296296296296297, "signal/frontier_ece_reward/group_std_mean": 0.025443398704131443, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00206139978642265, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00206139978642265, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24270604054133096, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7702546296296297, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3108425835768382, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0242706040541331, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0242706040541331, "step": 208, "total_flos": 0.0, "train_loss": -0.009165088099857362, "train_runtime": 38159.999, "train_samples_per_second": 0.393, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 419755476, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }